********************************************************************************
** 	TITLE:		merge				                                          ** 	
**  AUTHORS:	P. Mongrain, N. Fréchet, B. Thompson Collart, and Y. Dufresne **
**	DATE:		February 2025	 					                          **	
**  VERSION:	Stata 17					                                  **	
********************************************************************************

* Version control

version 16.0

* Run do-files

do "ca2011_ipsos_exit"
do "ca2011_ipsos_invitation"
do "ca2015_ipsos_exit"
do "ca2015_lpp"
do "ca2019_ces_phone"
do "ca2019_ces_web"
do "on2011_ipsos_exit"
do "on2011_ipsos_invitation"
do "on2014_ipsos_exit"
do "qc2022_datagochi"

* Open log file

capture log close       			  			              
log using "merge", replace


********************************************************************************
** SELECT VARIABLES 
********************************************************************************

* 2011 CANADA GENERAL ELECTION EXIT SURVEY

use "ca2011_ipsos_exit.dta", clear

keep election type exit_survey responseid district_name district_code district_outcome margin enep reelected male minority r_age age55 education univ postgrad postsec income highinc province vote_choice vote_district forecast_district correct_district wtvar1 wtvar2 sex_ca2011_w age_ca2011_w education_ca2011_w income_ca2011_w province_ca2011_w ca2011_svy_weight

rename r_age age
rename ca2011_svy_weight survey_weight

save "ca2011_ipsos_exit_merge.dta", replace

* 2011 CANADA GENERAL ELECTION INVITATION SURVEY

use "ca2011_ipsos_invitation.dta", clear

keep election type exit_survey responseid district_name district_code district_outcome margin enep reelected male r_age age55 education univ postgrad postsec income highinc province vote_choice vote_district forecast_district correct_district wtvar1 wtvar2 sex_ca2011_w age_ca2011_w education_ca2011_w income_ca2011_w province_ca2011_w ca2011_svy_weight

rename r_age age
rename ca2011_svy_weight survey_weight

save "ca2011_ipsos_invitation_merge.dta", replace

* 2015 CANADA GENERAL ELECTION EXIT POLL

use "ca2015_ipsos_exit.dta", clear

keep election type responseid probability district_name district_code district_outcome margin enep boundary male r_age age55 education_unif univ postgrad postsec income_unif highinc province vote_choice vote_district forecast_district correct_district time wtvar1 wtvar2 sex_ca2015_w age_ca2015_w education_ca2015_w income_ca2015_w province_ca2015_w ca2015_svy_weight

rename r_age age
rename education_unif education
rename income_unif income
rename ca2015_svy_weight survey_weight

save "ca2015_ipsos_exit_merge.dta", replace

* 2015 LOCAL PARLIAMENT PROJECT CANADIAN ELECTION SURVEY

use "ca2015_lpp.dta", clear

keep if restricted_sample == 1

keep election type responseid probability constituencyname constituencynumber district_outcome margin enep boundary male minority age age55 education_unif univ postgrad postsec income_unif highinc interest highint prov vote_choice vote_district forecast_district correct_district_d time pid pidscale_district pidstatus_district relative weight_general_restricted sex_ca2015_w age_ca2015_w education_ca2015_w income_ca2015_w province_ca2015_w ca2015_svy_weight

recode pid 7=.

rename constituencyname district_name
rename constituencynumber district_code
rename correct_district_d correct_district
rename prov province
rename education_unif education
rename income_unif income
rename ca2015_svy_weight survey_weight

sum interest
gen interest_n = (interest - `r(min)') / (`r(max)'-`r(min)')

sum pidscale_district
gen pidscale_district_n = (pidscale_district - `r(min)') / (`r(max)'-`r(min)')

sum relative
gen relative_n = (relative - `r(min)') / (`r(max)'-`r(min)')

save "ca2015_lpp_merge.dta", replace

append using ca2015_ipsos_exit_merge
save "ca2015_merge.dta", replace

* 2019 CANADIAN ELECTION STUDY PHONE SURVEY

use "ca2019_ces_phone.dta", clear

keep election type responseid probability constituencyname constituencynumber district_outcome margin enep reelected male age age55 education univ postgrad postsec highinc interest highint income province vote_choice vote_district forecast_district correct_district_d time pid pidscale_district pid pidstatus_district relative weight_CES sex_ca2019_w age_ca2019_w education_ca2019_w income_ca2019_w province_ca2019_w ca2019_svy_weight

recode pid 9=.

rename constituencyname district_name
rename constituencynumber district_code
rename correct_district_d correct_district
rename ca2019_svy_weight survey_weight

sum interest
gen interest_n = (interest - `r(min)') / (`r(max)'-`r(min)')

sum pidscale_district
gen pidscale_district_n = (pidscale_district - `r(min)') / (`r(max)'-`r(min)')

sum relative
gen relative_n = (relative - `r(min)') / (`r(max)'-`r(min)')

save "ca2019_ces_phone_merge.dta", replace

* 2019 CANADIAN ELECTION STUDY INTERNET SURVEY

use "ca2019_ces_web.dta", clear

keep if cps19_data_quality == 0

keep election type responseid probability constituencyname constituencynumber district_outcome margin enep reelected male age age55 education univ postgrad postsec income highinc interest highint province vote_choice vote_district forecast_district correct_district_d time pid pidscale_district pidstatus_district relative cps19_weight_general_restricted sex_ca2019_w age_ca2019_w education_ca2019_w income_ca2019_w province_ca2019_w ca2019_svy_weight

recode pid 9=.

rename constituencyname district_name
rename constituencynumber district_code
rename correct_district_d correct_district
rename ca2019_svy_weight survey_weight

sum interest
gen interest_n = (interest - `r(min)') / (`r(max)'-`r(min)')

sum pidscale_district
gen pidscale_district_n = (pidscale_district - `r(min)') / (`r(max)'-`r(min)')

sum relative
gen relative_n = (relative - `r(min)') / (`r(max)'-`r(min)')

save "ca2019_ces_web_merge.dta", replace

append using ca2019_ces_phone_merge
save "ca2019_merge.dta", replace

* 2011 ONTARIO GENERAL ELECTION EXIT SURVEY

use "on2011_ipsos_exit.dta", clear

keep election type exit_survey responseid district_name district_code district_outcome margin enep reelected male age age55 education univ postgrad postsec income highinc regionON vote_choice vote_district forecast_district correct_district wtvar sex_on2011_w age_on2011_w education_on2011_w income_on2011_w on2011_svy_weight

rename on2011_svy_weight survey_weight

save "on2011_ipsos_exit_merge.dta", replace

* 2011 ONTARIO GENERAL ELECTION INVITATION SURVEY

use "on2011_ipsos_invitation.dta", clear

keep election type exit_survey responseid district_name district_code district_outcome margin enep reelected male age age55 education univ postgrad postsec income_unif highinc regionON vote_choice vote_district forecast_district correct_district wtvar1 wtvar2 sex_on2011_w age_on2011_w education_on2011_w income_on2011_w on2011_svy_weight

rename income_unif income
rename on2011_svy_weight survey_weight

save "on2011_ipsos_invitation_merge.dta", replace

append using on2011_ipsos_exit_merge
save "on2011_merge.dta", replace

* 2014 ONTARIO GENERAL ELECTION EXIT SURVEY

use "on2014_ipsos_exit.dta", clear

keep election type responseid district_name district_code district_outcome margin enep reelected male age age55 education univ postsec income highinc regionON vote_choice vote_district forecast_district correct_district sex_on2014_w age_on2014_w education_on2014_w income_on2014_w on2014_svy_weight

rename on2014_svy_weight survey_weight

save "on2014_ipsos_exit_merge.dta", replace

* 2022 QUEBEC GENERAL ELECTION DATAGOTCHI

use "qc2022_datagochi.dta", clear

keep election type responseid riding days postal_code district_code district_outcome margin reelected male r_age age55 education univ postgrad postsec income highinc regionQC vote_choice vote_district forecast_district correct_district days sex_qc2022_w age_qc2022_w education_qc2022_w income_qc2022_w qc2022_svy_weight

rename riding district_name
rename r_age age
rename days time
rename qc2022_svy_weight survey_weight

save "qc2022_datagochi_merge.dta", replace


********************************************************************************
** NEW FILE
********************************************************************************

use "ca2011_ipsos_exit_merge.dta", clear

append using ca2011_ipsos_invitation_merge ca2015_ipsos_exit_merge ca2015_lpp_merge ca2019_ces_phone_merge ca2019_ces_web_merge on2011_ipsos_exit_merge on2011_ipsos_invitation_merge on2014_ipsos_exit_merge qc2022_datagochi_merge, force

* NORMALIZE MARGIN

gen margin_z = ln(margin)
replace margin_z = 0 if margin == 0

* CREATE UNIT

gen unit = ""

replace unit = "Canada" if election == "ca2011" | election == "ca2015" | election == "ca2019"
replace unit = "Ontario" if election == "on2011" | election == "on2014"
replace unit = "Quebec" if election == "qc2022"

* CREATE AGE GROUPS

gen age_group = age

recode age_group 18/24=1 25/34=2 35/44=3 45/54=4 55/64=5 65/110=6 if election!="on2011"
recode age_group 991=1 992=2 993=3 994=4 995=5 996=6 if election == "on2011"

* CREATE LABELS

label drop _all

label define outcome 1 "Conservative Party of Canada" 2 "Liberal Party of Canada" 3 "New Democratic Party of Canada" 4 "Bloc Québécois" 5 "Green Party of Canada" 241 "Coalition Avenir Quebec" 242 "Parti Quebecois" 243 "Quebec Liberal Party" 244 "Quebec solidaire" 245 "Parti conservateur du Quebec" 351 "Progressive Conservative Party of Ontario" 352 "Ontario Liberal Party" 353 "Ontario New Democratic Party" 354 "Green Party of Ontario" 88 "Other"
label values district_outcome outcome

label define boundary 0 "No boundary changes" 1 "Boundary changes"
label values boundary boundary

label define reelected 0 "Not reelected" 1 "Reelected"
label values reelected reelected

label define province 1 "British Columbia" 2 "Alberta" 3 "Saskatchewan" 4 "Manitoba" 5 "Ontario" 6 "Quebec" 7 "New Brunswick" 8 "Nova Scotia" 9 "Prince Edward Island" 10 "Newfoundland and Labrador" 11 "Northwest Territories" 12 "Yukon" 13 "Nunavut"
label values province province

label define regionON 1 "GTA 416" 2 "GTA 905" 3 "Southwestern" 4 "Central" 5 "Northern" 6 "Eastern"
label values regionON regionON

label define regionQC 1 "Abitibi-Témiscamingue" 2 "Bas-Saint-Laurent" 3 "Capitale-Nationale" 4 "Centre-du-Québec" 5 "Chaudière-Appalaches" 6 "Côte-Nord" 7 "Estrie" 8 "Gaspésie--Îles-de-la-Madeleine" 9 "Lanaudière" 10 "Lanaudière" 11 "Laurentides" 12 "Laval" 13 "Mauricie" 14 "Montréal" 15 "Montérégie" 16 "Nord-du-Québec" 17 "Outaouais" 18 "Saguenay--Lac-Saint-Jean"
label values regionQC regionQC

label define age 991 "18-24" 992 "25-34" 993 "35-44" 994 "45-54" 995 "55-64" 996 "65+"
label values age age

label define age55 0 "Less than 55" 1 "55 and over"
label values age55 age55

label define age_group 1 "18-24" 2 "25-34" 3 "35-44" 4 "45-54" 5 "55-64" 6 "65+"
label values age_group age_group

label define gender 0 "Female" 1 "Male"
label values male gender

label define income 1 "Under $10,000" 2 "$10,000-$14,999" 3 "$15,000-$19,999" 4 "$20,000-$24,999" 5 "$25,000-$29,999" 6 "$30,000-$34,999" 7 "$35,000-$39,999" 8 "$40,000-$44,999" 9 "$45,000-$49,999" 9 "$50,000-$54,999" 9 "$45,000-$54,999" 10 "$55,000-$59,999" 11 "$60,000-$64,999" 11 "$65,000-$69,999" 11 "$60,000-$69,999" 12 "$70,000-$74,999" 12 "$75,000-$79,999" 12 "$70,000-$79,999" 13 "$80,000-$84,999" 13 "$85,000-$89,999" 13 "$80,000-$99,999" 14 "$90,000-$94,999" 15 "$95,000-$99,999" 16 "$100,000-$119,999" 16 "$100,000-$124,999" 17 "$120,000-$149,999" 17 "$125,000-$149,999" 18 "$150,000 or more" 771 "Less than $20,000" 772 "$20,000 to $39,999" 773 "$40,000 to $59,999" 774 "$60,000 to $79,999" 775 "$80,000 to $99,999" 776 "$100,000 or more" 881 "No income" 882 "$1-$30,000" 883 "$30,001-$60,000" 884 "$60,001-$90,000" 885 "$90,001-$110,000" 886 "$110,001-$150,000" 887 "$150,001-$200,000" 888 "More than $200,000" 991 "$30,000 or less" 992 "$30,000-$59,999" 993 "$60,000 or more" 1001 "Less than $5,000" 1002 "$5,000-$9,999" 1003 "$10,000-$14,999" 1004 "$15,000-$19,999" 1005 "$20,000-$24,999" 1006 "$25,000-$29,999" 1007 "$30,000-$34,999" 1008 "$35,000-$39,999" 1009 "$40,000-$44,999" 1010 "$45,000-$49,999" 1011 "$50,000-$54,999" 1012 "$55,000-$59,999" 1013 "$60,000-$64,999" 1014 "$65,000-$69,999" 1015 "$70,000-$74,999" 1016 "$75,000-$79,999" 1017 "$80,000-$89,999" 1018 "$90,000-$99,999" 1019 "$100,000-$124,999" 1020 "$125,000-$149,999" 1021 "$150,000-199,999$" 1022 "$200,000-249,999$" 1023 "$250,000 or more"
label values income income

label define education 1 "Primary school or less" 2 "Some high school" 3 "High school" 4 "Some community college/CEGEP/Trade school" 5 "Community college/CEGEP/Trade school" 6 "Some university" 7 "University undergraduate degree" 8 "University graduate degree" 771 "Some high school" 772 "High school" 773 "Community college/CEGEP/Trade school" 774 "Some university" 775 "University undergraduate degree" 776 "University graduate degree" 881 "None" 882 "Primary school" 883 "High school" 884 "College/CEGEP" 885 "Bachelor" 886 "Master" 887 "Doctorate" 991 "Less than high school" 992 "High school" 993 "Some postsecondary education" 994 "University graduate" 901 "No schooling" 902 "Some elementary school" 903 "Completed elementary school" 904 "Some secondary/high school" 905 "Completed secondary/high school" 906 "Some technical, community college, CEGEP, College Classique" 907 "Completed technical, community college, CEGEP, College Classique" 908 "Some university" 909 "Bachelor's degree" 910 "Master's degree" 911 "Professional degree or doctorate" 
label values education education

label define univ 0 "No university degree" 1 "University degree"
label values univ univ

label define postgrad 0 "No postgraduate degree" 1 "Postgraduate degree"
label values postgrad postgrad

label define postsec 0 "No postsecondary degree" 1 "Postsecondary degree"
label values postsec postsec

label define highinc 0 "Other income categories" 1 "Highest incomes"
label values highinc highinc

label define winner 0 "Voted for loser" 1 "Voted for winner"
label values vote_district winner

label define probability 0 "Qualitative" 1 "Probability"
label values probability probability

recode forecast_district -99=999

label define forecast 1 "Conservative Party of Canada" 2 "Liberal Party of Canada" 3 "New Democratic Party of Canada" 4 "Bloc Québécois" 5 "Green Party of Canada" 6 "People's Party of Canada" 241 "Coalition Avenir Quebec" 242 "Parti Quebecois" 243 "Quebec Liberal Party" 244 "Quebec solidaire" 245 "Parti conservateur du Quebec" 351 "Progressive Conservative Party of Ontario" 352 "Ontario Liberal Party" 353 "Ontario New Democratic Party" 354 "Green Party of Ontario" 88 "Other" 99 "Ambiguous" 999 "Don't know"
label values forecast_district forecast

recode vote_choice -99=777
recode vote_choice 0=999
recode vote_choice 99=999
label define vote_choice 1 "Conservative Party of Canada" 2 "Liberal Party of Canada" 3 "New Democratic Party of Canada" 4 "Bloc Québécois" 5 "Green Party of Canada" 6 "People's Party of Canada" 241 "Coalition Avenir Quebec" 242 "Parti Quebecois" 243 "Quebec Liberal Party" 244 "Quebec solidaire" 245 "Parti conservateur du Quebec" 351 "Progressive Conservative Party of Ontario" 352 "Ontario Liberal Party" 353 "Ontario New Democratic Party" 354 "Green Party of Ontario" 88 "Other" 999 "Will not vote/Spoiled ballot" 777 "Don't know"
label values vote_choice vote_choice

label define correct 0 "Incorrect" 1 "Correct"
label values correct_district correct

* DROP OBSERVATIONS WITH MISSING DISTRICT INFORMATION AND MISSING FORECASTS

drop if district_code == .
drop if district_code == 10999
drop if district_code == 11999
drop if district_code == 12999
drop if district_code == 13999
drop if district_code == 24999
drop if district_code == 35999
drop if district_code == 46999
drop if district_code == 47999
drop if district_code == 48999
drop if district_code == 59999
drop if district_code == 10999
drop if district_code == 99999

drop if forecast_district == .

* CHECK FOR PLURALITY

drop if vote_choice==.

bysort district_code election vote_choice: gen num=_N
bysort district_code election(num): gen vote_choice_mode=vote_choice[_N]

gen plurality = 1 if vote_choice_mode==district_outcome
replace plurality = 0 if vote_choice_mode!=district_outcome

sort district_code election vote_choice
quietly by district_code election vote_choice: gen dup = cond(_N==1,0,_n)
drop if dup > 1

sort district_code election num
quietly by district_code election num: gen dup1 = cond(_N==1,0,_n)

replace dup1 = 0 if vote_choice_mode!=vote_choice

* GENERATE MEAN VALUES FOR CHOSEN VARIABLES BY DISTRICT

bysort district_code election: egen mean_vote = mean(vote_district)
bysort district_code election: egen mean_postsec = mean(postsec)
bysort district_code election: egen mean_age55 = mean(age55)
bysort district_code election: egen mean_male = mean(male)
bysort district_code election: egen mean_highinc = mean(highinc)
bysort district_code election: egen mean_time = mean(time)

* GENERATE PROPORTION OF VALUE FOR LOSER/WINNER STATUS

gen pidstatus_district2 = 1 if pidstatus_district == 3
replace pidstatus_district2 = 0 if pidstatus_district!=3 & pidstatus_district!=.

bysort district_code election: egen vote_district_prop = mean(vote_district)
bysort district_code election: egen pidstatus_district_prop = mean(pidstatus_district2)
bysort district_code election: egen correct_district_prop = mean(correct_district)

sort district_code election
quietly by district_code election: gen dup = cond(_N==1,0,_n)
drop if dup > 1

* GENERATE PROPORTION OF VALUE CATEGORIES FOR CHOSEN VARIABLES BY DISTRICT

bysort district_code election education: gen education_prop = _N if education!=.
by district_code election: replace education_prop = education_prop/_N if education!=.

bysort district_code election age_group: gen age_prop = _N if age_group!=.
by district_code election: replace age_prop = age_prop/_N if age_group!=.

bysort district_code election income: gen income_prop = _N if income!=.
by district_code election: replace income_prop = income_prop/_N if income!=.

bysort district_code election male: gen male_prop = _N if male!=.
by district_code election: replace male_prop = male_prop/_N if male!=.

bysort district_code election vote_choice: gen choice_prop = _N if vote_choice!=.
by district_code election: replace choice_prop = choice_prop/_N if vote_choice!=.

bysort district_code election pid: gen pid_prop = _N if pid!=.
by district_code election: replace pid_prop = pid_prop/_N if pid!=.

bysort district_code election time: gen time_prop = _N if time!=.
by district_code election: replace time_prop = time_prop/_N if time!=.

bysort district_code election interest: gen interest_prop = _N if interest!=.
by district_code election: replace interest_prop = interest_prop/_N if interest!=.

savesome if election == "ca2011" using ca2011_diversity.dta, replace
savesome if election == "ca2015" using ca2015_diversity.dta, replace
savesome if election == "ca2019" using ca2019_diversity.dta, replace
savesome if election == "on2011" using on2011_diversity.dta, replace
savesome if election == "on2014" using on2014_diversity.dta, replace
savesome if election == "qc2022" using qc2022_diversity.dta, replace

* FSA

bysort postal_code election education: gen education_fsa_prop = _N if education!=.
by postal_code election: replace education_fsa_prop = education_fsa_prop/_N if education!=.

bysort postal_code election age_group: gen age_fsa_prop = _N if age_group!=.
by postal_code election: replace age_fsa_prop = age_fsa_prop/_N if age_group!=.

bysort postal_code election income: gen income_fsa_prop = _N if income!=.
by postal_code election: replace income_fsa_prop = income_fsa_prop/_N if income!=.

bysort postal_code election male: gen male_fsa_prop = _N if male!=.
by postal_code election: replace male_fsa_prop = male_fsa_prop/_N if male!=.

bysort postal_code election vote_choice: gen choice_fsa_prop = _N if vote_choice!=.
by postal_code election: replace choice_fsa_prop = choice_fsa_prop/_N if vote_choice!=.

bysort postal_code election time: gen time_fsa_prop = _N if time!=.
by postal_code election: replace time_fsa_prop = time_fsa_prop/_N if time!=.

savesome if election == "qc2022" using qc2022_fsa_diversity.dta, replace

* SAVE NEW DATASET

save "merge.dta", replace

* GENERATE PROPORTION OF CORRECT FORECAST

tab correct_district if election == "ca2011"
tab correct_district if election == "ca2015"
tab correct_district if election == "ca2019"
tab correct_district if election == "on2011"
tab correct_district if election == "on2014"
tab correct_district if election == "qc2022"
tab correct_district


********************************************************************************
** INDIVIDUAL-LEVEL ANALYSES
********************************************************************************

use "merge.dta", clear

* CANADA 2011

eststo I1: melogit correct_district i.vote_district##i.postsec i.male i.age55 i.highinc margin_z i.reelected [pweight=survey_weight] if election == "ca2011" || district_code: i.vote_district##i.postsec

matrix N_g = e(N_g)
local groups = N_g[1,1]
estadd local groups `groups'

estat icc
estadd scalar icc2 = r(icc2)

estat ic
estadd scalar AIC = el(r(S),1,5)
estadd scalar BIC = el(r(S),1,6)

* CANADA 2015 #1

eststo I2_1: melogit correct_district i.vote_district##i.postsec i.male i.age55 i.highinc margin_z i.boundary time [pweight=survey_weight] if election == "ca2015" || district_code: i.vote_district##i.postsec

matrix N_g = e(N_g)
local groups = N_g[1,1]
estadd local groups `groups'

estat icc
estadd scalar icc2 = r(icc2)

estat ic
estadd scalar AIC = el(r(S),1,5)
estadd scalar BIC = el(r(S),1,6)

* CANADA 2015 #2

eststo I2_2: melogit correct_district i.pidstatus_district##c.interest_n i.postsec interest_n i.male i.age55 i.highinc margin_z i.boundary time [pweight=survey_weight] if election == "ca2015" || district_code: i.pidstatus_district##c.interest_n

matrix N_g = e(N_g)
local groups = N_g[1,1]
estadd local groups `groups'

estat icc
estadd scalar icc2 = r(icc2)

estat ic
estadd scalar AIC = el(r(S),1,5)
estadd scalar BIC = el(r(S),1,6)

* CANADA 2019 #1

eststo I3_1: melogit correct_district i.vote_district##i.postsec i.male i.age55 i.highinc margin_z i.reelected time [pweight=survey_weight] if election == "ca2019" || district_code: i.vote_district##i.postsec

matrix N_g = e(N_g)
local groups = N_g[1,1]
estadd local groups `groups'

estat icc
estadd scalar icc2 = r(icc2)

estat ic
estadd scalar AIC = el(r(S),1,5)
estadd scalar BIC = el(r(S),1,6)

* CANADA 2019 #2

eststo I3_2: melogit correct_district i.pidstatus_district##c.interest_n i.postsec interest_n i.male i.age55 i.highinc margin_z i.reelected time [pweight=survey_weight] if election == "ca2019" || district_code: i.pidstatus_district##c.interest_n

matrix N_g = e(N_g)
local groups = N_g[1,1]
estadd local groups `groups'

estat icc
estadd scalar icc2 = r(icc2)

estat ic
estadd scalar AIC = el(r(S),1,5)
estadd scalar BIC = el(r(S),1,6)

* ONTARIO 2011

eststo I4: melogit correct_district i.vote_district##i.postsec i.male i.age55 i.highinc margin_z i.reelected [pweight=survey_weight] if election == "on2011" || district_code: i.vote_district##i.postsec

matrix N_g = e(N_g)
local groups = N_g[1,1]
estadd local groups `groups'

estat icc
estadd scalar icc2 = r(icc2)

estat ic
estadd scalar AIC = el(r(S),1,5)
estadd scalar BIC = el(r(S),1,6)

* ONTARIO 2014

eststo I5: melogit correct_district i.vote_district##i.postsec i.male i.age55 i.highinc margin_z i.reelected [pweight=survey_weight] if election == "on2014" || district_code: i.vote_district##i.postsec

matrix N_g = e(N_g)
local groups = N_g[1,1]
estadd local groups `groups'

estat icc
estadd scalar icc2 = r(icc2)

estat ic
estadd scalar AIC = el(r(S),1,5)
estadd scalar BIC = el(r(S),1,6)

* QUEBEC 2022

eststo I6: melogit correct_district i.vote_district##i.postsec i.male i.age55 i.highinc margin_z i.reelected time [pweight=survey_weight] if election == "qc2022" || district_code: i.vote_district##i.postsec

matrix N_g = e(N_g)
local groups = N_g[1,1]
estadd local groups `groups'

estat icc
estadd scalar icc2 = r(icc2)

estat ic
estadd scalar AIC = el(r(S),1,5)
estadd scalar BIC = el(r(S),1,6)

* REGRESSION TABLE

label variable correct_district " "

esttab I1 I2_1 I3_1 I4 I5 I6 I2_2 I3_2 using "individual_level.tex", wide nonumbers drop(0.vote_district 1.pidstatus_district 0.postsec 0.male 0.age55 0.highinc 0.reelected 0.boundary 0.vote_district#0.postsec 0.vote_district#1.postsec 1.vote_district#0.postsec 1.pidstatus_district#c.interest_n) refcat(1.vote_district "\textbf{Partisan preference}" 1.male "\textbf{Sociodemographics}" margin_z "\textbf{Task difficulty}" 1.vote_district#1.postsec "\textbf{Interaction}" 1.postsec "\textbf{Sophistication}" ll "Model fit statistics", nolabel) not width(\hsize) b(2) star(+ 0.10 * 0.05 ** 0.01 *** 0.001) label mtitles("CA 2011" "CA 2015" "CA 2019" "ON 2011" "ON 2014" "QC 2022" "CA 2015" "CA 2019") interaction("$\times$")style(tex) replace booktabs varlabels(_cons "\textbf{Constant}" 1.vote_district "\hspace{1em} Voted for winner" pidscale_district "\hspace{1em} Strong winner PID" 1.postsec "\hspace{1em} Postsecondary" 1.male "\hspace{1em} Male" 1.age55 "\hspace{1em} 55 years and over" 1.highinc "\hspace{1em} High income" margin_z "\hspace{1em} Margin of victory" 1.boundary "\hspace{1em} Boundary changes" 1.reelected "\hspace{1em} Reelected" time "\hspace{1em} Response date" 2.pidstatus_district "\hspace{1em} No PID" 3.pidstatus_district "\hspace{1em} Winner PID" 2.pidstatus_district#1.postsec "\hspace{1em} No PID $\times$ PSE" 3.pidstatus_district#1.postsec "\hspace{1em} Winner PID $\times$ PSE" 1.vote_district#1.postsec "\hspace{1em} Vote $\times$ PSE" interest_n "\hspace{1em} High interest" 2.pidstatus_district#c.interest_n "\hspace{1em} No PID $\times$ Interest" 3.pidstatus_district#c.interest_n "\hspace{1em} Winner PID $\times$ Interest") title("Citizens' forecasts for district-level elections, individual level."\label{ind}) order(1.vote_district 2.pidstatus_district 3.pidstatus_district 1.postsec interest_n 1.male 1.age55 1.highinc 1.vote_district#1.postsec 2.pidstatus_district#c.interest_n 3.pidstatus_district#c.interest_n margin_z 1.reelected 1.boundary time _cons) eqlab("" "") scalars(ll aic bic) stats(N groups icc2 ll AIC BIC, labels("Observations" "Districts" "ICC" "Log likelihood" "AIC" "BIC") fmt(%9.0fc %9.0fc %9.2fc %9.2f %9.2f %9.2f)) nonotes

eststo clear

*esttab I1 I2_1 I3_1 I4 I5 I6 I2_2 I3_2 using "individual_level.tex", wide nonumbers drop(0.vote_district 1.pidstatus_district 0.postsec 0.male 0.age55 0.highinc 0.reelected 0.boundary 0.vote_district#0.postsec 0.vote_district#1.postsec 1.vote_district#0.postsec 1.pidstatus_district#0.postsec 2.pidstatus_district#0.postsec 3.pidstatus_district#0.postsec 1.pidstatus_district#1.postsec 0.postsec#c.interest_n 1.pidstatus_district#c.interest_n 1.pidstatus_district#0.postsec#c.interest_n 1.pidstatus_district#1.postsec#c.interest_n 2.pidstatus_district#0.postsec#c.interest_n 3.pidstatus_district#0.postsec#c.interest_n) refcat(1.vote_district "\textbf{Partisan preference}" 1.postsec "\textbf{Sociodemographics}" margin "\textbf{Task difficulty}" 1.vote_district#1.postsec "\textbf{Two-way interactions}" 2.pidstatus_district#1.postsec#c.interest_n "\textbf{Three-way interactions}" interest_n "\textbf{Political interest}" ll "Model fit statistics", nolabel) not width(\hsize) b(2) star(+ 0.10 * 0.05 ** 0.01 *** 0.001) label mtitles("CA 2011" "CA 2015" "CA 2019" "ON 2011" "ON 2014" "QC 2022" "CA 2015" "CA 2019") interaction("$\times$")style(tex) replace booktabs varlabels(_cons "\textbf{Constant}" 1.vote_district "\hspace{1em} Voted for winner" pidscale_district "\hspace{1em} Strong winner PID" 1.postsec "\hspace{1em} Postsecondary" 1.male "\hspace{1em} Male" 1.age55 "\hspace{1em} 55 years and over" 1.highinc "\hspace{1em} High income" margin "\hspace{1em} Margin of victory" 1.boundary "\hspace{1em} Boundary changes" 1.reelected "\hspace{1em} Reelected" time "\hspace{1em} Response date" 2.pidstatus_district "\hspace{1em} No PID" 3.pidstatus_district "\hspace{1em} Winner PID" 2.pidstatus_district#1.postsec "\hspace{1em} No PID $\times$ PSE" 3.pidstatus_district#1.postsec "\hspace{1em} Winner PID $\times$ PSE" 1.vote_district#1.postsec "\hspace{1em} Vote $\times$ PSE" 1.postsec#2.pidstatus_district "\hspace{1em} No PID $\times$ PSE" 1.postsec#3.pidstatus_district "\hspace{1em} Winner PID $\times$ PSE" interest_n "\hspace{1em} High interest" 1.postsec#c.interest_n "\hspace{1em} PSE $\times$ Interest" 2.pidstatus_district#c.interest_n "\hspace{1em} No PID $\times$ Interest" 3.pidstatus_district#c.interest_n "\hspace{1em} Winner PID $\times$ Interest" 2.pidstatus_district#1.postsec#c.interest_n "\hspace{1em} No PID $\times$ Postsec $\times$ Interest" 3.pidstatus_district#1.postsec#c.interest_n "\hspace{1em} Winner PID $\times$ Postsec $\times$ Interest") title("Citizens' forecasts for district-level elections, individual level."\label{ind}) order(1.vote_district 2.pidstatus_district 3.pidstatus_district 1.postsec 1.male 1.age55 1.highinc interest_n 1.vote_district#1.postsec 2.pidstatus_district#1.postsec 3.pidstatus_district#1.postsec 1.postsec#c.interest_n 2.pidstatus_district#c.interest_n 3.pidstatus_district#c.interest_n 2.pidstatus_district#1.postsec#c.interest_n 3.pidstatus_district#1.postsec#c.interest_n margin 1.reelected 1.boundary time _cons) eqlab("" "") scalars(ll aic bic) stats(N groups icc2 ll AIC BIC, labels("Observations" "Districts" "ICC" "Log likelihood" "AIC" "BIC") fmt(%9.0fc %9.0fc %9.2fc %9.2f %9.2f %9.2f)) nonotes


********************************************************************************
** DIFFERENCE BETWEEN WINNERS AND LOSERS
********************************************************************************

use "merge.dta", clear

* CANADA 2011

melogit correct_district i.vote_district##i.postsec i.male i.age55 i.highinc margin i.reelected [pweight=survey_weight] if election == "ca2011" || district_code:
margins, at(vote_district=(0 1)) atmeans post
mlincom, clear

qui mlincom 1, stat(all) add rowname("Pr(Correct Forecast):Losers")
qui mlincom 2, stat(all) add rowname("Pr(Correct Forecast):Winners")

mlincom 2 - 1, stat(all) add rowname("Winners - Losers")

* CANADA 2015

melogit correct_district i.vote_district##i.postsec i.male i.age55 i.highinc margin i.boundary time [pweight=survey_weight] if election == "ca2015" || district_code:
margins, at(vote_district=(0 1)) atmeans post
mlincom, clear

qui mlincom 1, stat(all) add rowname("Pr(Correct Forecast):Losers")
qui mlincom 2, stat(all) add rowname("Pr(Correct Forecast):Winners")

mlincom 2 - 1, stat(all) add rowname("Winners - Losers")

* CANADA 2019

melogit correct_district i.vote_district##i.postsec i.male i.age55 i.highinc margin i.reelected time [pweight=survey_weight] if election == "ca2019" || district_code:
margins, at(vote_district=(0 1)) atmeans post
mlincom, clear

qui mlincom 1, stat(all) add rowname("Pr(Correct Forecast):Losers")
qui mlincom 2, stat(all) add rowname("Pr(Correct Forecast):Winners")

mlincom 2 - 1, stat(all) add rowname("Winners - Losers")

* ONTARIO 2011

melogit correct_district i.vote_district##i.postsec i.male i.age55 i.highinc margin i.reelected [pweight=survey_weight] if election == "on2011" || district_code:
margins, at(vote_district=(0 1)) atmeans post
mlincom, clear

qui mlincom 1, stat(all) add rowname("Pr(Correct Forecast):Losers")
qui mlincom 2, stat(all) add rowname("Pr(Correct Forecast):Winners")

mlincom 2 - 1, stat(all) add rowname("Winners - Losers")

* ONTARIO 2014

melogit correct_district i.vote_district##i.postsec i.male i.age55 i.highinc margin i.reelected [pweight=survey_weight] if election == "on2014" || district_code:
margins, at(vote_district=(0 1)) atmeans post
mlincom, clear

qui mlincom 1, stat(all) add rowname("Pr(Correct Forecast):Losers")
qui mlincom 2, stat(all) add rowname("Pr(Correct Forecast):Winners")

mlincom 2 - 1, stat(all) add rowname("Winners - Losers")

* QUEBEC 2022

melogit correct_district i.vote_district##i.postsec i.male i.age55 i.highinc margin i.reelected time [pweight=survey_weight] if election == "qc2022" || district_code:
margins, at(vote_district=(0 1)) atmeans post
mlincom, clear

qui mlincom 1, stat(all) add rowname("Pr(Correct Forecast):Losers")
qui mlincom 2, stat(all) add rowname("Pr(Correct Forecast):Winners")

mlincom 2 - 1, stat(all) add rowname("Winners - Losers")


********************************************************************************
** INTERACTION (CANADA 2011)
********************************************************************************

use "merge.dta", clear

melogit correct_district i.vote_district##i.postsec i.male i.age55 i.highinc margin i.reelected [pweight=survey_weight] if election == "ca2011" || district_code:
est store Dca2011

* Plot of predictions

* NOTE: Making separate predictions for no postsecondary and postsecondary;

* Store each for use with coefplot
margins if postsec == 0, at(vote_district=(0 1)) atmeans post
est store Dca2011L

est restore Dca2011
margins if postsec == 1, at(vote_district=(0 1)) atmeans post
est store Dca2011W

* Basic plot
coefplot Dca2011L Dca2011W, vertical recast(bar) barw(0.3)

* Plot with added options
coefplot (Dca2011L, col(gs8)) (Dca2011W, col(gs12)), ///
	vertical recast(bar) barw(0.3) level(95) ///
	ciopts(recast(rcap) color(black)) citop ///
	legend(order(1 "No Postsecondary" 3 "Postsecondary")) ///
	xlab(1 "Losers" 2 "Winners") ///
	ytitle("Pr(Correct Forecast)") ylab(0(.2)1) 
	
* Predictions, AMEs, and 2nd Difference tests
est restore Dca2011
margins, over(postsec) at(vote_district=(0 1)) atmeans post
mlincom, clear

* NOTE: The next four lines are not necessary but the margins output is hard
* to follow so I'm repeating the margins output in the same table as the AMEs
* and 2nd diff test with clearer labeling
qui mlincom 1, stat(all) add rowname("Pr(Correct Forecast):Losers, No Postsecondary")
qui mlincom 2, stat(all) add rowname("Pr(Correct Forecast):Losers, Postsecondary")
qui mlincom 3, stat(all) add rowname("Pr(Correct Forecast):Winners, No Postsecondary")
qui mlincom 4, stat(all) add rowname("Pr(Correct Forecast):Winners, Postsecondary")

* Effect of education for losers
mlincom 2 - 1, stat(all) add rowname("AME Education: Losers")
* Effect of education for winners
mlincom 4 - 3, stat(all) add rowname("AME Education: Winners")
* 2nd differences
qui mlincom (2 - 1) - (4 - 3), stat(all) add rowname("2nd Diff") 

* Final table
mlincom, twidth(20) title("Pr(Correct Forecast) - Interaction Effects of Vote Status x Education")


********************************************************************************
** INTERACTION (CANADA 2015)
********************************************************************************

melogit correct_district i.vote_district##i.postsec i.male i.age55 i.highinc margin i.boundary time [pweight=survey_weight] if election == "ca2015" || district_code:
est store Dca2015

* Plot of predictions

* NOTE: Making separate predictions for no postsecondary and postsecondary;

* Store each for use with coefplot
margins if postsec == 0, at(vote_district=(0 1)) atmeans post
est store Dca2015L

est restore Dca2015
margins if postsec == 1, at(vote_district=(0 1)) atmeans post
est store Dca2015W

* Basic plot
coefplot Dca2015L Dca2015W, vertical recast(bar) barw(0.3)

* Plot with added options
coefplot (Dca2015L, col(gs8)) (Dca2015W, col(gs12)), ///
	vertical recast(bar) barw(0.3) level(95) ///
	ciopts(recast(rcap) color(black)) citop ///
	legend(order(1 "No Postsecondary" 3 "Postsecondary")) ///
	xlab(1 "Losers" 2 "Winners") ///
	ytitle("Pr(Correct Forecast)") ylab(0(.2)1) 
	
* Predictions, AMEs, and 2nd Difference tests
est restore Dca2015
margins, over(postsec) at(vote_district=(0 1)) atmeans post
mlincom, clear

* NOTE: The next four lines are not necessary but the margins output is hard
* to follow so I'm repeating the margins output in the same table as the AMEs
* and 2nd diff test with clearer labeling
qui mlincom 1, stat(all) add rowname("Pr(Correct Forecast):Losers, No Postsecondary")
qui mlincom 2, stat(all) add rowname("Pr(Correct Forecast):Losers, Postsecondary")
qui mlincom 3, stat(all) add rowname("Pr(Correct Forecast):Winners, No Postsecondary")
qui mlincom 4, stat(all) add rowname("Pr(Correct Forecast):Winners, Postsecondary")

* Effect of educationn for losers
mlincom 2 - 1, stat(all) add rowname("AME Education: Losers")
* Effect of education for winners
mlincom 4 - 3, stat(all) add rowname("AME Education: Winners")
* 2nd differences
qui mlincom (2 - 1) - (4 - 3), stat(all) add rowname("2nd Diff") 

* Final table
mlincom, twidth(20) title("Pr(Correct Forecast) - Interaction Effects of Vote Status x Education")


********************************************************************************
** INTERACTION (CANADA 2019)
********************************************************************************

melogit correct_district i.vote_district##i.postsec i.male i.age55 i.highinc margin i.reelected time [pweight=survey_weight] if election == "ca2019" || district_code:
est store Dca2019

* Plot of predictions

* NOTE: Making separate predictions for no postsecondary and postsecondary;

* Store each for use with coefplot
margins if postsec == 0, at(vote_district=(0 1)) atmeans post
est store Dca2019L

est restore Dca2019
margins if postsec == 1, at(vote_district=(0 1)) atmeans post
est store Dca2019W

* Basic plot
coefplot Dca2019L Dca2019W, vertical recast(bar) barw(0.3)

* Plot with added options
coefplot (Dca2019L, col(gs8)) (Dca2019W, col(gs12)), ///
	vertical recast(bar) barw(0.3) level(95) ///
	ciopts(recast(rcap) color(black)) citop ///
	legend(order(1 "No Postsecondary" 3 "Postsecondary")) ///
	xlab(1 "Losers" 2 "Winners") ///
	ytitle("Pr(Correct Forecast)") ylab(0(.2)1) 
	
* Predictions, AMEs, and 2nd Difference tests
est restore Dca2019
margins, over(postsec) at(vote_district=(0 1)) atmeans post
mlincom, clear

* NOTE: The next four lines are not necessary but the margins output is hard
* to follow so I'm repeating the margins output in the same table as the AMEs
* and 2nd diff test with clearer labeling
qui mlincom 1, stat(all) add rowname("Pr(Correct Forecast):Losers, No Postsecondary")
qui mlincom 2, stat(all) add rowname("Pr(Correct Forecast):Losers, Postsecondary")
qui mlincom 3, stat(all) add rowname("Pr(Correct Forecast):Winners, No Postsecondary")
qui mlincom 4, stat(all) add rowname("Pr(Correct Forecast):Winners, Postsecondary")

* Effect of educationn for losers
mlincom 2 - 1, stat(all) add rowname("AME Education: Losers")
* Effect of education for winners
mlincom 4 - 3, stat(all) add rowname("AME Education: Winners")
* 2nd differences
qui mlincom (2 - 1) - (4 - 3), stat(all) add rowname("2nd Diff") 

* Final table
mlincom, twidth(20) title("Pr(Correct Forecast) - Interaction Effects of Vote Status x Education")


********************************************************************************
** INTERACTION (ONTARIO 2011)
********************************************************************************

melogit correct_district i.vote_district##i.postsec i.male i.age55 i.highinc margin i.reelected [pweight=survey_weight] if election == "on2011" || district_code:
est store Don2011

* Plot of predictions

* NOTE: Making separate predictions for no postsecondary and postsecondary;

* Store each for use with coefplot
margins if postsec == 0, at(vote_district=(0 1)) atmeans post
est store Don2011L

est restore Don2011
margins if postsec == 1, at(vote_district=(0 1)) atmeans post
est store Don2011W

* Basic plot
coefplot Don2011L Don2011W, vertical recast(bar) barw(0.3)

* Plot with added options
coefplot (Don2011L, col(gs8)) (Don2011W, col(gs12)), ///
	vertical recast(bar) barw(0.3) level(95) ///
	ciopts(recast(rcap) color(black)) citop ///
	legend(order(1 "No Postsecondary" 3 "Postsecondary")) ///
	xlab(1 "Losers" 2 "Winners") ///
	ytitle("Pr(Correct Forecast)") ylab(0(.2)1) 
	
* Predictions, AMEs, and 2nd Difference tests
est restore Don2011
margins, over(postsec) at(vote_district=(0 1)) atmeans post
mlincom, clear

* NOTE: The next four lines are not necessary but the margins output is hard
* to follow so I'm repeating the margins output in the same table as the AMEs
* and 2nd diff test with clearer labeling
qui mlincom 1, stat(all) add rowname("Pr(Correct Forecast):Losers, No Postsecondary")
qui mlincom 2, stat(all) add rowname("Pr(Correct Forecast):Losers, Postsecondary")
qui mlincom 3, stat(all) add rowname("Pr(Correct Forecast):Winners, No Postsecondary")
qui mlincom 4, stat(all) add rowname("Pr(Correct Forecast):Winners, Postsecondary")

* Effect of educationn for losers
mlincom 2 - 1, stat(all) add rowname("AME Education: Losers")
* Effect of education for winners
mlincom 4 - 3, stat(all) add rowname("AME Education: Winners")
* 2nd differences
qui mlincom (2 - 1) - (4 - 3), stat(all) add rowname("2nd Diff") 

* Final table
mlincom, twidth(20) title("Pr(Correct Forecast) - Interaction Effects of Vote Status x Education")


********************************************************************************
** INTERACTION (ONTARIO 2014)
********************************************************************************

melogit correct_district i.vote_district##i.postsec i.male i.age55 i.highinc margin i.reelected [pweight=survey_weight] if election == "on2014" || district_code:
est store Don2014

* Plot of predictions

* NOTE: Making separate predictions for no postsecondary and postsecondary;

* Store each for use with coefplot
margins if postsec == 0, at(vote_district=(0 1)) atmeans post
est store Don2014L

est restore Don2014
margins if postsec == 1, at(vote_district=(0 1)) atmeans post
est store Don2014W

* Basic plot
coefplot Don2014L Don2014W, vertical recast(bar) barw(0.3)

* Plot with added options
coefplot (Don2014L, col(gs8)) (Don2014W, col(gs12)), ///
	vertical recast(bar) barw(0.3) level(95) ///
	ciopts(recast(rcap) color(black)) citop ///
	legend(order(1 "No Postsecondary" 3 "Postsecondary")) ///
	xlab(1 "Losers" 2 "Winners") ///
	ytitle("Pr(Correct Forecast)") ylab(0(.2)1) 
	
* Predictions, AMEs, and 2nd Difference tests
est restore Don2014
margins, over(postsec) at(vote_district=(0 1)) atmeans post
mlincom, clear

* NOTE: The next four lines are not necessary but the margins output is hard
* to follow so I'm repeating the margins output in the same table as the AMEs
* and 2nd diff test with clearer labeling
qui mlincom 1, stat(all) add rowname("Pr(Correct Forecast):Losers, No Postsecondary")
qui mlincom 2, stat(all) add rowname("Pr(Correct Forecast):Losers, Postsecondary")
qui mlincom 3, stat(all) add rowname("Pr(Correct Forecast):Winners, No Postsecondary")
qui mlincom 4, stat(all) add rowname("Pr(Correct Forecast):Winners, Postsecondary")

* Effect of educationn for losers
mlincom 2 - 1, stat(all) add rowname("AME Education: Losers")
* Effect of education for winners
mlincom 4 - 3, stat(all) add rowname("AME Education: Winners")
* 2nd differences
qui mlincom (2 - 1) - (4 - 3), stat(all) add rowname("2nd Diff") 

* Final table
mlincom, twidth(20) title("Pr(Correct Forecast) - Interaction Effects of Vote Status x Education")


********************************************************************************
** INTERACTION (QUEBEC 2022)
********************************************************************************

melogit correct_district i.vote_district##i.postsec i.male i.age55 i.highinc margin i.reelected time [pweight=survey_weight] if election == "qc2022" || district_code:
est store Dqc2022

* Plot of predictions

* NOTE: Making separate predictions for no postsecondary and postsecondary;

* Store each for use with coefplot
margins if postsec == 0, at(vote_district=(0 1)) atmeans post
est store Dqc2022L

est restore Dqc2022
margins if postsec == 1, at(vote_district=(0 1)) atmeans post
est store Dqc2022W

* Basic plot
coefplot Dqc2022L Dqc2022W, vertical recast(bar) barw(0.3)

* Plot with added options
coefplot (Dqc2022L, col(gs8)) (Dqc2022W, col(gs12)), ///
	vertical recast(bar) barw(0.3) level(95) ///
	ciopts(recast(rcap) color(black)) citop ///
	legend(order(1 "No Postsecondary" 3 "Postsecondary")) ///
	xlab(1 "Losers" 2 "Winners") ///
	ytitle("Pr(Correct Forecast)") ylab(0(.2)1) 
	
* Predictions, AMEs, and 2nd Difference tests
est restore Dqc2022
margins, over(postsec) at(vote_district=(0 1)) atmeans post
mlincom, clear

* NOTE: The next four lines are not necessary but the margins output is hard
* to follow so I'm repeating the margins output in the same table as the AMEs
* and 2nd diff test with clearer labeling
qui mlincom 1, stat(all) add rowname("Pr(Correct Forecast):Losers, No Postsecondary")
qui mlincom 2, stat(all) add rowname("Pr(Correct Forecast):Losers, Postsecondary")
qui mlincom 3, stat(all) add rowname("Pr(Correct Forecast):Winners, No Postsecondary")
qui mlincom 4, stat(all) add rowname("Pr(Correct Forecast):Winners, Postsecondary")

* Effect of educationn for losers
mlincom 2 - 1, stat(all) add rowname("AME Education: Losers")
* Effect of education for winners
mlincom 4 - 3, stat(all) add rowname("AME Education: Winners")
* 2nd differences
qui mlincom (2 - 1) - (4 - 3), stat(all) add rowname("2nd Diff") 

* Final table
mlincom, twidth(20) title("Pr(Correct Forecast) - Interaction Effects of Vote Status x Education")


********************************************************************************
** STORE ESTIMATES FROM INTERACTION
********************************************************************************

* CANADA 2011

use "merge.dta", clear

melogit correct_district i.vote_district##i.postsec i.male i.age55 i.highinc margin i.reelected [pweight=survey_weight] if election == "ca2011" || district_code:
margins, over(postsec) at(vote_district=(0 1)) atmeans post
parmest, label saving(ca2011_district_parmest.dta, replace)

use "ca2011_district_parmest.dta", clear
gen electionid = "ca2011"
gen comp = "losers" if parm == "1._at#0.postsec" | parm == "1._at#1.postsec"
replace comp = "winners" if parm == "2._at#0.postsec" | parm == "2._at#1.postsec"
gen postsec = "postsec" if parm == "1._at#1.postsec" | parm == "2._at#1.postsec"
replace postsec = "nopostsec" if parm == "1._at#0.postsec" | parm == "2._at#0.postsec"
save "ca2011_district_parmest.dta", replace

* CANADA 2015

use "merge.dta", clear

melogit correct_district i.vote_district##i.postsec i.male i.age55 i.highinc margin i.boundary time [pweight=survey_weight] if election == "ca2015" || district_code:
margins, over(postsec) at(vote_district=(0 1)) atmeans post
parmest, label saving(ca2015_district_parmest.dta, replace)

use "ca2015_district_parmest.dta", clear
gen electionid = "ca2015"
gen comp = "losers" if parm == "1._at#0.postsec" | parm == "1._at#1.postsec"
replace comp = "winners" if parm == "2._at#0.postsec" | parm == "2._at#1.postsec"
gen postsec = "postsec" if parm == "1._at#1.postsec" | parm == "2._at#1.postsec"
replace postsec = "nopostsec" if parm == "1._at#0.postsec" | parm == "2._at#0.postsec"
save "ca2015_district_parmest.dta", replace

* CANADA 2019

use "merge.dta", clear

melogit correct_district i.vote_district##i.postsec i.male i.age55 i.highinc margin i.reelected time [pweight=survey_weight] if election == "ca2019" || district_code:
margins, over(postsec) at(vote_district=(0 1)) atmeans post
parmest, label saving(ca2019_district_parmest.dta, replace)

use "ca2019_district_parmest.dta", clear
gen electionid = "ca2019"
gen comp = "losers" if parm == "1._at#0.postsec" | parm == "1._at#1.postsec"
replace comp = "winners" if parm == "2._at#0.postsec" | parm == "2._at#1.postsec"
gen postsec = "postsec" if parm == "1._at#1.postsec" | parm == "2._at#1.postsec"
replace postsec = "nopostsec" if parm == "1._at#0.postsec" | parm == "2._at#0.postsec"
save "ca2019_district_parmest.dta", replace

* ONTARIO 2011

use "merge.dta", clear

melogit correct_district i.vote_district##i.postsec i.male i.age55 i.highinc margin i.reelected [pweight=survey_weight] if election == "on2011" || district_code:
margins, over(postsec) at(vote_district=(0 1)) atmeans post
parmest, label saving(on2011_district_parmest.dta, replace)

use "on2011_district_parmest.dta", clear
gen electionid = "on2011"
gen comp = "losers" if parm == "1._at#0.postsec" | parm == "1._at#1.postsec"
replace comp = "winners" if parm == "2._at#0.postsec" | parm == "2._at#1.postsec"
gen postsec = "postsec" if parm == "1._at#1.postsec" | parm == "2._at#1.postsec"
replace postsec = "nopostsec" if parm == "1._at#0.postsec" | parm == "2._at#0.postsec"
save "on2011_district_parmest.dta", replace

* ONTARIO 2014

use "merge.dta", clear

melogit correct_district i.vote_district##i.postsec i.male i.age55 i.highinc margin i.reelected [pweight=survey_weight] if election == "on2014" || district_code:
margins, over(postsec) at(vote_district=(0 1)) atmeans post
parmest, label saving(on2014_district_parmest.dta, replace)

use "on2014_district_parmest.dta", clear
gen electionid = "on2014"
gen comp = "losers" if parm == "1._at#0.postsec" | parm == "1._at#1.postsec"
replace comp = "winners" if parm == "2._at#0.postsec" | parm == "2._at#1.postsec"
gen postsec = "postsec" if parm == "1._at#1.postsec" | parm == "2._at#1.postsec"
replace postsec = "nopostsec" if parm == "1._at#0.postsec" | parm == "2._at#0.postsec"
save "on2014_district_parmest.dta", replace

* QUEBEC 2022

use "merge.dta", clear

melogit correct_district i.vote_district##i.postsec i.male i.age55 i.highinc margin i.reelected time [pweight=survey_weight] if election == "qc2022" || district_code:
margins, over(postsec) at(vote_district=(0 1)) atmeans post
parmest, label saving(qc2022_district_parmest.dta, replace)

use "qc2022_district_parmest.dta", clear
gen electionid = "qc2022"
gen comp = "losers" if parm == "1._at#0.postsec" | parm == "1._at#1.postsec"
replace comp = "winners" if parm == "2._at#0.postsec" | parm == "2._at#1.postsec"
gen postsec = "postsec" if parm == "1._at#1.postsec" | parm == "2._at#1.postsec"
replace postsec = "nopostsec" if parm == "1._at#0.postsec" | parm == "2._at#0.postsec"
save "qc2022_district_parmest.dta", replace

* MERGE ESTIMATES FROM ALL ELECTIONS

use "ca2011_district_parmest.dta", clear
append using "ca2015_district_parmest.dta"
append using "on2011_district_parmest.dta"
append using "on2014_district_parmest.dta"
append using "qc2022_district_parmest.dta"
save "district_parmest.dta", replace


********************************************************************************
** PREDICTED PROBABILITIES (CANADA 2011)
********************************************************************************

use "merge.dta", clear

melogit correct_district i.vote_district##i.postsec i.male i.age55 i.highinc margin i.reelected [pweight=survey_weight] if election == "ca2011" || district_code:
est store Cca2011

margins, at(vote_district=(0 1)) atmeans post

est restore Cca2011
margins, at(postsec=(0 1)) atmeans post

est restore Cca2011
margins, at(margin=(0 10)) atmeans post

est restore Cca2011
margins, at(reelected=(0 1)) atmeans post


********************************************************************************
** PREDICTED PROBABILITIES (CANADA 2015 #1)
********************************************************************************

melogit correct_district i.vote_district##i.postsec i.male i.age55 i.highinc margin i.boundary time [pweight=survey_weight] if election == "ca2015" || district_code:
est store Cca2015

margins, at(vote_district=(0 1)) atmeans post

est restore Cca2015
margins, at(postsec=(0 1)) atmeans post

est restore Cca2015
margins, at(margin=(0 10)) atmeans post

est restore Cca2015
margins, at(boundary=(0 1)) atmeans post


********************************************************************************
** PREDICTED PROBABILITIES (CANADA 2015 #2)
********************************************************************************

melogit correct_district i.pidstatus_district##c.interest_n##i.postsec interest_n i.male i.age55 i.highinc margin i.boundary time [pweight=survey_weight] if election == "ca2015" || district_code:
est store Cca2015_2

margins, at(pidstatus_district=(1 2 3)) atmeans post

est restore Cca2015_2
margins, at(postsec=(0 1)) atmeans post

est restore Cca2015_2
margins, at(margin=(0 10)) atmeans post

est restore Cca2015_2
margins, at(boundary=(0 1)) atmeans post

est restore Cca2015_2
margins, at(interest_n=(0 1)) atmeans post


********************************************************************************
** PREDICTED PROBABILITIES (CANADA 2019 #1)
********************************************************************************

melogit correct_district i.vote_district##i.postsec i.male i.age55 i.highinc margin i.reelected time [pweight=survey_weight] if election == "ca2019" || district_code:
est store Cca2019

margins, at(vote_district=(0 1)) atmeans post

est restore Cca2019
margins, at(postsec=(0 1)) atmeans post

est restore Cca2019
margins, at(margin=(0 10)) atmeans post

est restore Cca2019
margins, at(reelected=(0 1)) atmeans post


********************************************************************************
** PREDICTED PROBABILITIES (CANADA 2019 #2)
********************************************************************************

melogit correct_district i.pidstatus_district##c.interest_n##i.postsec interest_n i.male i.age55 i.highinc margin i.reelected time [pweight=survey_weight] if election == "ca2019" || district_code:
est store Cca2019_2

margins, at(pidstatus_district=(1 2 3)) atmeans post

est restore Cca2019_2
margins, at(postsec=(0 1)) atmeans post

est restore Cca2019_2
margins, at(margin=(0 10)) atmeans post

est restore Cca2019_2
margins, at(reelected=(0 1)) atmeans post

est restore Cca2019_2
margins, at(interest_n=(0 1)) atmeans post


********************************************************************************
** PREDICTED PROBABILITIES (ONTARIO 2011)
********************************************************************************

melogit correct_district i.vote_district##i.postsec i.male i.age55 i.highinc margin i.reelected [pweight=survey_weight] if election == "on2011" || district_code:
est store Con2011

margins, at(vote_district=(0 1)) atmeans post

est restore Con2011
margins, at(postsec=(0 1)) atmeans post

est restore Con2011
margins, at(margin=(0 10)) atmeans post

est restore Con2011
margins, at(reelected=(0 1)) atmeans post


********************************************************************************
** PREDICTED PROBABILITIES (ONTARIO 2014)
********************************************************************************

melogit correct_district i.vote_district##i.postsec i.male i.age55 i.highinc margin i.reelected [pweight=survey_weight] if election == "on2014" || district_code:
est store Con2014

margins, at(vote_district=(0 1)) atmeans post

est restore Con2014
margins, at(postsec=(0 1)) atmeans post

est restore Con2014
margins, at(margin=(0 10)) atmeans post

est restore Con2014
margins, at(reelected=(0 1)) atmeans post


********************************************************************************
** PREDICTED PROBABILITIES (QUEBEC 2022)
********************************************************************************

melogit correct_district i.vote_district##i.postsec i.male i.age55 i.highinc margin i.reelected time [pweight=survey_weight] if election == "qc2022" || district_code:
est store Cqc2022

margins, at(vote_district=(0 1)) atmeans post

est restore Cqc2022
margins, at(postsec=(0 1)) atmeans post

est restore Cqc2022
margins, at(margin=(0 10)) atmeans post

est restore Cqc2022
margins, at(reelected=(0 1)) atmeans post


********************************************************************************
** DISTRICT FORECASTS
********************************************************************************

use "merge.dta", clear

drop if district_code == .

tabulate forecast_district, generate(f)

rename f1 cpc 
rename f2 lpc 
rename f3 ndp 
rename f4 bq 
rename f5 gpc 
rename f6 ppc 
rename f7 oth 
rename f8 amb
rename f9 caq
rename f10 pq 
rename f11 plq 
rename f12 qs 
rename f13 pcq
rename f14 opc
rename f15 olp
rename f16 ondp
rename f17 gpo
rename f18 dk

bysort district_code election: egen u_dk = mean(dk)
bysort district_code election: egen u_cpc = mean(cpc)
bysort district_code election: egen u_lpc = mean(lpc)
bysort district_code election: egen u_ndp = mean(ndp)
bysort district_code election: egen u_bq = mean(bq)
bysort district_code election: egen u_gpc = mean(gpc)
bysort district_code election: egen u_ppc = mean(ppc)
bysort district_code election: egen u_oth = mean(oth)
bysort district_code election: egen u_amb = mean(amb)
bysort district_code election: egen u_caq = mean(caq)
bysort district_code election: egen u_pq = mean(pq)
bysort district_code election: egen u_plq = mean(plq)
bysort district_code election: egen u_qs = mean(qs)
bysort district_code election: egen u_pcq = mean(pcq)
bysort district_code election: egen u_opc = mean(opc)
bysort district_code election: egen u_olp = mean(olp)
bysort district_code election: egen u_ondp = mean(ondp)
bysort district_code election: egen u_gpo = mean(gpo)

bysort district_code election: egen u_correct = mean(correct_district)
duplicates tag election district_code, gen(dup)
duplicates drop election district_code, force
gen size = ln(dup + 1)
gen size_n = dup + 1
unab xvars: u_dk u_cpc u_lpc u_ndp u_bq u_gpc u_ppc u_oth u_amb u_caq u_pq u_plq u_qs u_pcq u_opc u_olp u_ondp u_gpo 
gen which_max = "" 
gen max = 0 

quietly foreach x of local xvars { 
    replace which_max = "`x'" if `x' > max
    replace max = `x' if `x' > max
}
    replace which_max = substr(which_max, 3, .)
	
tab which_max if election == "ca2011"
tab which_max if election == "ca2015"
tab which_max if election == "ca2019"
tab which_max if election == "on2011"
tab which_max if election == "on2014"
tab which_max if election == "qc2022"

gen group_forecast = .

replace group_forecast = -99 if which_max == "dk"
replace group_forecast = 1 if which_max == "cpc"
replace group_forecast = 2 if which_max == "lpc"
replace group_forecast = 3 if which_max == "ndp"
replace group_forecast = 4 if which_max == "bq"
replace group_forecast = 5 if which_max == "gpc"
replace group_forecast = 6 if which_max == "ppc"
replace group_forecast = 88 if which_max == "oth"
replace group_forecast = 99 if which_max == "amb"
replace group_forecast = 241 if which_max == "caq"
replace group_forecast = 242 if which_max == "pq"
replace group_forecast = 243 if which_max == "plq"
replace group_forecast = 244 if which_max == "qs"
replace group_forecast = 245 if which_max == "pcq"
replace group_forecast = 351 if which_max == "opc"
replace group_forecast = 352 if which_max == "olp"
replace group_forecast = 353 if which_max == "ondp"
replace group_forecast = 354 if which_max == "gpo"

* Sample size categories

gen size_cat = .

replace size_cat = 1 if size_n < 25
replace size_cat = 2 if size_n >= 25 & size_n < 50
replace size_cat = 3 if size_n >= 50 & size_n < 100
replace size_cat = 4 if size_n >= 100

label define size_cat 1 "< 25" 2 "25-49" 3 "50-99" 4 "100+"
label values size_cat size_cat

tab size_cat if election == "ca2011"
tab size_cat if election == "ca2015"
tab size_cat if election == "ca2019"
tab size_cat if election == "on2011"
tab size_cat if election == "on2014"
tab size_cat if election == "qc2022"


********************************************************************************
** PERCENTAGE OF CORRECT DISTRICT FORECASTS
********************************************************************************

gen correct_group = 1 if group_forecast == district_outcome
replace correct_group = 0 if group_forecast!=district_outcome

tab correct_group if election == "ca2011"
tab correct_group if election == "ca2015"
tab correct_group if election == "ca2019"
tab correct_group if election == "on2011"
tab correct_group if election == "on2014"
tab correct_group if election == "qc2022"
tab correct_group

* IDENTIFY EQUAL PLURALITY FORECASTS (CANADA)

gen equality = .

replace equality = 1 if u_dk == u_cpc & unit == "Canada" & u_dk==max & u_cpc==max | u_dk == u_lpc & unit == "Canada" & u_dk==max & u_lpc==max | u_dk == u_ndp & unit == "Canada" & u_dk==max & u_dk==ndp | u_dk == u_bq & unit == "Canada" & u_dk==max & u_bq==max | u_dk == u_gpc & unit == "Canada" & u_dk==max & u_gpc==max | u_dk == u_ppc & unit == "Canada" & u_dk==max & u_ppc==max | u_dk == u_oth & unit == "Canada" & u_dk!=0 & u_oth==max | u_dk == u_amb & unit == "Canada" & u_dk==max & u_amb==max

replace equality = 1 if u_cpc == u_lpc & unit == "Canada" & u_cpc==max & u_lpc==max | u_cpc == u_ndp & unit == "Canada" & u_cpc==max & u_ndp==max | u_cpc == u_bq & unit == "Canada" & u_cpc==max & u_bq==max | u_cpc == u_gpc & unit == "Canada" & u_cpc==max & u_gpc==max | u_cpc == u_ppc & unit == "Canada" & u_cpc==max & u_ppc==max | u_cpc == u_oth & unit == "Canada" & u_cpc==max & u_oth==max | u_cpc == u_amb & unit == "Canada" & u_cpc==max & u_amb==max

replace equality = 1 if u_lpc == u_ndp & unit == "Canada" & u_lpc==max & u_ndp==max | u_lpc == u_bq & unit == "Canada" & u_lpc==max & u_bq==max | u_lpc == u_gpc & unit == "Canada" & u_lpc==max & u_gpc==max | u_lpc == u_ppc & unit == "Canada" & u_lpc==max & u_ppc==max | u_lpc == u_oth & unit == "Canada" & u_lpc==max & u_oth==max | u_lpc == u_amb & unit == "Canada" & u_lpc==max & u_amb==max

replace equality = 1 if u_ndp == u_bq & unit == "Canada" & u_ndp==max & u_bq==max | u_ndp == u_gpc & unit == "Canada" & u_ndp==max & u_gpc==max | u_ndp == u_ppc & unit == "Canada" & u_ndp==max & u_ppc==max | u_ndp == u_oth & unit == "Canada" & u_ndp==max & u_oth==max | u_ndp == u_amb & unit == "Canada" & u_ndp==max & u_amb==max

replace equality = 1 if u_bq == u_gpc & unit == "Canada" & u_bq==max & u_gpc==max | u_bq == u_ppc & unit == "Canada" & u_bq==max & u_ppc==max | u_bq == u_oth & unit == "Canada" & u_bq==max & u_oth==max | u_bq == u_amb & unit == "Canada" & u_bq==max!=0 & u_amb==max
 
replace equality = 1 if u_gpc == u_ppc & unit == "Canada" & u_gpc==max & u_ppc==max | u_gpc == u_oth & unit == "Canada" & u_gpc==max & u_oth==max | u_gpc == u_amb & unit == "Canada" & u_gpc==max & u_amb==max

replace equality = 1 if u_ppc == u_oth & unit == "Canada" & u_ppc==max & u_oth==max | u_ppc == u_amb & unit == "Canada" & u_ppc==max & u_amb==max

replace equality = 1 if u_oth == u_amb & unit == "Canada" & u_oth==max & u_amb==max

* IDENTIFY EQUAL PLURALITY FORECASTS (ONTARIO)

replace equality = 1 if u_dk == u_opc & unit == "Ontario" & u_dk==max & u_opc==max | u_dk == u_olp & unit == "Ontario" & u_dk==max & u_olp==max | u_dk == u_ondp & unit == "Ontario" & u_dk==max & u_ondp==max | u_dk == u_gpo & unit == "Ontario" & u_dk==max & u_gpo==max | u_dk == u_oth & unit == "Ontario" & u_dk!=0 & u_oth==max | u_dk == u_amb & unit == "Ontario" & u_dk==max & u_amb==max

replace equality = 1 if u_opc == u_olp & unit == "Ontario" & u_opc==max & u_olp==max | u_opc == u_ondp & unit == "Ontario" & u_opc==max & u_ondp==max | u_opc == u_gpo & unit == "Ontario" & u_opc==max & u_gpo==max | u_opc == u_oth & unit == "Ontario" & u_opc==max & u_oth==max | u_opc == u_amb & unit == "Ontario" & u_opc==max & u_amb==max

replace equality = 1 if u_olp == u_ondp & unit == "Ontario" & u_olp==max & u_ondp==max | u_olp == u_gpo & unit == "Ontario" & u_olp==max & u_gpo==max | u_olp == u_oth & unit == "Ontario" & u_olp==max & u_oth==max | u_olp == u_amb & unit == "Ontario" & u_olp==max & u_amb==max

replace equality = 1 if u_ondp == u_gpo & unit == "Ontario" & u_ondp==max & u_gpo==max | u_ondp == u_oth & unit == "Ontario" & u_ondp==max & u_dk==oth | u_ondp == u_amb & unit == "Ontario" & u_ondp==max & u_amb==max

replace equality = 1 if u_gpo == u_oth & unit == "Ontario" & u_gpo==max & u_oth==max | u_gpo == u_amb & unit == "Ontario" & u_gpo==max & u_amb==max

replace equality = 1 if u_oth == u_amb & unit == "Ontario" & u_oth==max & u_amb==max

* IDENTIFY EQUAL PLURALITY FORECASTS (QUEBEC)

replace equality = 1 if u_dk == u_caq & unit == "Quebec" & u_dk==max & u_caq==max | u_dk == u_pq & unit == "Quebec" & u_dk==max & u_pq==max | u_dk == u_plq & unit == "Quebec" & u_dk==max & u_plq==max | u_dk == u_qs & unit == "Quebec" & u_dk==max & u_qs==max | u_dk == u_pcq & unit == "Quebec" & u_dk==max & u_pcq==max | u_dk == u_oth & unit == "Quebec" & u_dk!=0 & u_oth==max | u_dk == u_amb & unit == "Quebec" & u_dk==max & u_amb==max

replace equality = 1 if u_caq == u_pq & unit == "Quebec" & u_caq==max & u_pq==max | u_caq == u_plq & unit == "Quebec" & u_caq==max & u_plq==max | u_caq == u_qs & unit == "Quebec" & u_caq==max & u_qs==max | u_caq == u_pcq & unit == "Quebec" & u_caq==max & u_pcq==max | u_caq == u_oth & unit == "Quebec" & u_caq==max & u_oth==max | u_caq == u_amb & unit == "Quebec" & u_caq==max & u_amb==max

replace equality = 1 if u_pq == u_plq & unit == "Quebec" & u_pq==max & u_plq==max | u_pq == u_qs & unit == "Quebec" & u_pq==max & u_qs==max | u_pq == u_pcq & unit == "Quebec" & u_pq==max & u_pcq==max | u_pq == u_oth & unit == "Quebec" & u_pq==max & u_oth==max | u_pq == u_amb & unit == "Quebec" & u_pq==max & u_amb==max

replace equality = 1 if u_plq == u_qs & unit == "Quebec" & u_plq==max & u_qs==max | u_plq == u_pcq & unit == "Quebec" & u_plq==max & u_pcq==max | u_plq == u_oth & unit == "Quebec" & u_plq==max & u_oth==max | u_plq == u_amb & unit == "Quebec" & u_plq==max & u_amb==max

replace equality = 1 if u_qs == u_pcq & unit == "Quebec" & u_qs==max & u_pcq==max | u_qs == u_oth & unit == "Quebec" & u_qs==max & u_oth==max | u_qs == u_amb & unit == "Quebec" & u_qs==max & u_amb==max

replace equality = 1 if u_pcq == u_oth & unit == "Quebec" & u_pcq==max & u_oth==max | u_pcq == u_amb & unit == "Quebec" & u_pcq==max & u_amb==max

replace equality = 1 if u_oth == u_amb & unit == "Quebec" & u_oth==max & u_amb==max

* REPLACE EQUALITY BY INCORRECT FORECAST

replace correct_group = 0 if correct_group == 1 & equality == 1

save "aggregate.dta", replace


********************************************************************************
** DISTRICT FORECASTS: FSA
********************************************************************************

use "merge.dta", clear

drop if postal_code == ""

tabulate forecast_district, generate(f)

rename f1 caq
rename f2 pq 
rename f3 plq 
rename f4 qs 
rename f5 pcq

keep if election == "qc2022"

bysort postal_code: egen u_caq = mean(caq)
bysort postal_code: egen u_pq = mean(pq)
bysort postal_code: egen u_plq = mean(plq)
bysort postal_code: egen u_qs = mean(qs)
bysort postal_code: egen u_pcq = mean(pcq)

bysort postal_code: egen u_correct = mean(correct_district)
duplicates tag postal_code, gen(dup)
duplicates drop postal_code, force
gen size = ln(dup + 1)
unab xvars: u_caq u_pq u_plq u_qs u_pcq
gen which_max = "" 
gen max = 0 

quietly foreach x of local xvars { 
    replace which_max = "`x'" if `x' > max
    replace max = `x' if `x' > max
}
    replace which_max = substr(which_max, 3, .)
	
tab which_max if election == "qc2022"

gen group_forecast = .

replace group_forecast = 241 if which_max == "caq"
replace group_forecast = 242 if which_max == "pq"
replace group_forecast = 243 if which_max == "plq"
replace group_forecast = 244 if which_max == "qs"
replace group_forecast = 245 if which_max == "pcq"

* Sample size categories

gen size_cat = .

replace size_cat = 1 if size < 25
replace size_cat = 2 if size >= 25 & size < 50
replace size_cat = 3 if size >= 50 & size < 100
replace size_cat = 4 if size >= 100

label define size_cat 1 "< 25" 2 "25-49" 3 "50-99" 4 "100+"
label values size_cat size_cat

tab size_cat if election == "qc2022"


********************************************************************************
** PERCENTAGE OF CORRECT DISTRICT FORECASTS: FSA
********************************************************************************

gen correct_group = 1 if group_forecast == district_outcome
replace correct_group = 0 if group_forecast!=district_outcome

tab correct_group if election == "qc2022"

* IDENTIFY EQUAL PLURALITY FORECASTS (QUEBEC)

gen equality = .

replace equality = 1 if u_caq == u_pq & unit == "Quebec" & u_caq==max & u_pq==max | u_caq == u_plq & unit == "Quebec" & u_caq==max & u_plq==max | u_caq == u_qs & unit == "Quebec" & u_caq==max & u_qs==max | u_caq == u_pcq & unit == "Quebec" & u_caq==max & u_pcq==max

replace equality = 1 if u_pq == u_plq & unit == "Quebec" & u_pq==max & u_plq==max | u_pq == u_qs & unit == "Quebec" & u_pq==max & u_qs==max | u_pq == u_pcq & unit == "Quebec" & u_pq==max & u_pcq==max 

replace equality = 1 if u_plq == u_qs & unit == "Quebec" & u_plq==max & u_qs==max | u_plq == u_pcq & unit == "Quebec" & u_plq==max & u_pcq==max

replace equality = 1 if u_qs == u_pcq & unit == "Quebec" & u_qs==max & u_pcq==max

* REPLACE EQUALITY BY INCORRECT FORECAST

replace correct_group = 0 if equality == 1

save "aggregate_fsa.dta", replace


********************************************************************************
** GROUP-LEVEL ANALYSES
********************************************************************************

use "aggregate.dta", clear

drop if mean_vote==. & mean_postsec==. & mean_age55==. & mean_male==. & mean_highinc==.

eststo G1: logit correct_group mean_vote mean_postsec mean_male mean_age55 mean_highinc margin i.reelected size if election == "ca2011"
eststo G2: logit correct_group mean_vote mean_postsec mean_male mean_age55 mean_highinc margin i.boundary time size if election == "ca2015"
eststo G3: logit correct_group mean_vote mean_postsec mean_male mean_age55 mean_highinc margin i.reelected time size if election == "ca2019"
eststo G4: logit correct_group mean_vote mean_postsec mean_male mean_age55 mean_highinc margin i.reelected size if election == "on2011"
eststo G5: logit correct_group mean_vote mean_postsec mean_male mean_age55 mean_highinc margin i.reelected size if election == "on2014"
eststo G6: logit correct_group mean_vote mean_postsec mean_male mean_age55 mean_highinc margin i.reelected time size if election == "qc2022"

label variable correct_group " "

esttab G1 G2 G3 G4 G5 G6 using "group_level.tex", wide drop(0.reelected 0.boundary) refcat(mean_vote "\textbf{Voted for winner?}" mean_postsec "\textbf{Sociodemographics}" margin "\textbf{Task difficulty}" size size "\textbf{Decision making}" ll "Model fit statistics", nolabel) not width(\hsize) b(2) star(+ 0.10 * 0.05 ** 0.01 *** 0.001) label mtitles("CA 11" "CA 15\textsuperscript{(a)}" "CA 19" "ON 11\textsuperscript{(b)}" "ON 14\textsuperscript{(b)}" "QC 22") interaction("$\times$")style(tex) replace booktabs varlabels(_cons "\textbf{Constant}" mean_vote "\hspace{1em} \% Winner" mean_postsec "\hspace{1em} \% Postsecondary" mean_male "\hspace{1em} \% Male" mean_age55 "\hspace{1em} \% 55 years and over" mean_highinc "\hspace{1em} \% High income" margin "\hspace{1em} Margin of victory" 1.reelected "\hspace{1em} Reelected" 1.boundary "\hspace{1em} Boundary changes" time "\hspace{1em} Response date" size "\hspace{1em} Group size (log)" _cons) title("Citizens' forecasts for district-level elections, group level."\label{group}) order(mean_vote mean_postsec mean_male mean_age55 mean_highinc margin 1.reelected 1.boundary time size _cons) eqlab("" "") scalars(ll aic bic) stats(N icc2 ll AIC BIC r2_p, labels("Observations" "ICC" "Log likelihood" "AIC" "BIC" "Pseudo-R}\textsuperscript{2}") fmt(%9.0fc %9.2fc %9.2f %9.2f %9.2f %9.2fc)) nonotes

eststo clear


********************************************************************************
** CREATE MEASURES OF DIVERSITY 
********************************************************************************

* EDUCATION DIVERSITY (CANADA 2011)

use "ca2011_diversity.dta", clear

duplicates tag district_code education, gen(dup)
duplicates drop district_code education, force

keep district_code education education_prop

drop if education == .

reshape wide education_prop, i(district_code) j(education)

recode * (.=1)

gen diversity_educ = (education_prop1*(ln(education_prop1)) + education_prop2*(ln(education_prop2)) + education_prop3*(ln(education_prop3)) + education_prop4*(ln(education_prop4)) + education_prop5*(ln(education_prop5)) + education_prop6*(ln(education_prop6)) + education_prop7*(ln(education_prop7)) + education_prop8*(ln(education_prop8)))*-1 

sum diversity_educ
replace diversity_educ = (diversity_educ - `r(min)') / (`r(max)'-`r(min)')

save "ca2011_diversity_educ.dta", replace

* EDUCATION DIVERSITY (CANADA 2015)

use "ca2015_diversity.dta", clear

duplicates tag district_code education, gen(dup)
duplicates drop district_code education, force

keep district_code education education_prop

drop if education == .

reshape wide education_prop, i(district_code) j(education)

recode * (.=1)

gen diversity_educ = (education_prop771*(ln(education_prop771)) + education_prop772*(ln(education_prop772)) + education_prop773*(ln(education_prop773)) + education_prop774*(ln(education_prop774)) + education_prop775*(ln(education_prop775)) + education_prop776*(ln(education_prop776)))*-1 

sum diversity_educ
replace diversity_educ = (diversity_educ - `r(min)') / (`r(max)'-`r(min)')

save "ca2015_diversity_educ.dta", replace

* EDUCATION DIVERSITY (CANADA 2019)

use "ca2019_diversity.dta", clear

duplicates tag district_code education, gen(dup)
duplicates drop district_code education, force

keep district_code education education_prop

drop if education == .

reshape wide education_prop, i(district_code) j(education)

recode * (.=1)

gen diversity_educ = (education_prop901*(ln(education_prop901)) + education_prop902*(ln(education_prop902)) + education_prop903*(ln(education_prop903)) + education_prop904*(ln(education_prop904)) + education_prop905*(ln(education_prop905)) + education_prop906*(ln(education_prop906)) + education_prop907*(ln(education_prop907)) + education_prop908*(ln(education_prop908)) + education_prop909*(ln(education_prop909)) + education_prop910*(ln(education_prop910)) + education_prop911*(ln(education_prop911)))*-1 

sum diversity_educ
replace diversity_educ = (diversity_educ - `r(min)') / (`r(max)'-`r(min)')

save "ca2019_diversity_educ.dta", replace

* EDUCATION DIVERSITY (ONTARIO 2011)

use "on2011_diversity.dta", clear

duplicates tag district_code education, gen(dup)
duplicates drop district_code education, force

keep district_code education education_prop

drop if education == .

reshape wide education_prop, i(district_code) j(education)

recode * (.=1)

gen diversity_educ = (education_prop1*(ln(education_prop1)) + education_prop2*(ln(education_prop2)) + education_prop3*(ln(education_prop3)) + education_prop4*(ln(education_prop4)) + education_prop5*(ln(education_prop5)) + education_prop6*(ln(education_prop6)) + education_prop7*(ln(education_prop7)) + education_prop8*(ln(education_prop8)))*-1 

sum diversity_educ
replace diversity_educ = (diversity_educ - `r(min)') / (`r(max)'-`r(min)')

save "on2011_diversity_educ.dta", replace

* EDUCATION DIVERSITY (ONTARIO 2014)

use "on2014_diversity.dta", clear

duplicates tag district_code education, gen(dup)
duplicates drop district_code education, force

keep district_code education education_prop

drop if education == .

reshape wide education_prop, i(district_code) j(education)

recode * (.=1)

gen diversity_educ = (education_prop991*(ln(education_prop991)) + education_prop992*(ln(education_prop992)) + education_prop993*(ln(education_prop993)) + education_prop994*(ln(education_prop994)))*-1 

sum diversity_educ
replace diversity_educ = (diversity_educ - `r(min)') / (`r(max)'-`r(min)')

save "on2014_diversity_educ.dta", replace

* EDUCATION DIVERSITY (QUEBEC 2022)

use "qc2022_diversity.dta", clear

duplicates tag district_code education, gen(dup)
duplicates drop district_code education, force

keep district_code education education_prop

drop if education == .

reshape wide education_prop, i(district_code) j(education)

recode * (.=1)

gen diversity_educ = (education_prop881*(ln(education_prop881)) + education_prop882*(ln(education_prop882)) + education_prop883*(ln(education_prop883)) + education_prop884*(ln(education_prop884)) + education_prop885*(ln(education_prop885)) + education_prop886*(ln(education_prop886)) + education_prop887*(ln(education_prop887)))*-1 

sum diversity_educ
replace diversity_educ = (diversity_educ - `r(min)') / (`r(max)'-`r(min)')

save "qc2022_diversity_educ.dta", replace

* AGE DIVERSITY (CANADA 2011)

use "ca2011_diversity.dta", clear

duplicates tag district_code age_group, gen(dup)
duplicates drop district_code age_group, force

keep district_code age_group age_prop

drop if age_group == .

reshape wide age_prop, i(district_code) j(age_group)

recode * (.=1)

gen diversity_age = (age_prop1*(ln(age_prop1)) + age_prop2*(ln(age_prop2)) + age_prop3*(ln(age_prop3)) + age_prop4*(ln(age_prop4)) + age_prop5*(ln(age_prop5)) + age_prop6*(ln(age_prop6)))*-1 

sum diversity_age
replace diversity_age = (diversity_age - `r(min)') / (`r(max)'-`r(min)')

save "ca2011_diversity_age.dta", replace

* AGE DIVERSITY (CANADA 2015)

use "ca2015_diversity.dta", clear

duplicates tag district_code age_group, gen(dup)
duplicates drop district_code age_group, force

keep district_code age_group age_prop

drop if age_group == .

reshape wide age_prop, i(district_code) j(age_group)

recode * (.=1)

gen diversity_age = (age_prop1*(ln(age_prop1)) + age_prop2*(ln(age_prop2)) + age_prop3*(ln(age_prop3)) + age_prop4*(ln(age_prop4)) + age_prop5*(ln(age_prop5)) + age_prop6*(ln(age_prop6)))*-1 

sum diversity_age
replace diversity_age = (diversity_age - `r(min)') / (`r(max)'-`r(min)')

save "ca2015_diversity_age.dta", replace

* AGE DIVERSITY (CANADA 2019)

use "ca2019_diversity.dta", clear

duplicates tag district_code age_group, gen(dup)
duplicates drop district_code age_group, force

keep district_code age_group age_prop

drop if age_group == .

reshape wide age_prop, i(district_code) j(age_group)

recode * (.=1)

gen diversity_age = (age_prop1*(ln(age_prop1)) + age_prop2*(ln(age_prop2)) + age_prop3*(ln(age_prop3)) + age_prop4*(ln(age_prop4)) + age_prop5*(ln(age_prop5)) + age_prop6*(ln(age_prop6)))*-1 

sum diversity_age
replace diversity_age = (diversity_age - `r(min)') / (`r(max)'-`r(min)')

save "ca2019_diversity_age.dta", replace

* AGE DIVERSITY (ONTARIO 2011)

use "on2011_diversity.dta", clear

duplicates tag district_code age_group, gen(dup)
duplicates drop district_code age_group, force

keep district_code age_group age_prop

drop if age_group == .

reshape wide age_prop, i(district_code) j(age_group)

recode * (.=1)

gen diversity_age = (age_prop1*(ln(age_prop1)) + age_prop2*(ln(age_prop2)) + age_prop3*(ln(age_prop3)) + age_prop4*(ln(age_prop4)) + age_prop5*(ln(age_prop5)) + age_prop6*(ln(age_prop6)))*-1 

sum diversity_age
replace diversity_age = (diversity_age - `r(min)') / (`r(max)'-`r(min)')

save "on2011_diversity_age.dta", replace

* AGE DIVERSITY (ONTARIO 2014)

use "on2014_diversity.dta", clear

duplicates tag district_code age_group, gen(dup)
duplicates drop district_code age_group, force

keep district_code age_group age_prop

drop if age_group == .

reshape wide age_prop, i(district_code) j(age_group)

recode * (.=1)

gen diversity_age = (age_prop1*(ln(age_prop1)) + age_prop2*(ln(age_prop2)) + age_prop3*(ln(age_prop3)) + age_prop4*(ln(age_prop4)) + age_prop5*(ln(age_prop5)) + age_prop6*(ln(age_prop6)))*-1 

sum diversity_age
replace diversity_age = (diversity_age - `r(min)') / (`r(max)'-`r(min)')

save "on2014_diversity_age.dta", replace

* AGE DIVERSITY (QUEBEC 2022)

use "qc2022_diversity.dta", clear

duplicates tag district_code age_group, gen(dup)
duplicates drop district_code age_group, force

keep district_code age_group age_prop

drop if age_group == .

reshape wide age_prop, i(district_code) j(age_group)

recode * (.=1)

gen diversity_age = (age_prop1*(ln(age_prop1)) + age_prop2*(ln(age_prop2)) + age_prop3*(ln(age_prop3)) + age_prop4*(ln(age_prop4)) + age_prop5*(ln(age_prop5)) + age_prop6*(ln(age_prop6)))*-1 

sum diversity_age
replace diversity_age = (diversity_age - `r(min)') / (`r(max)'-`r(min)')

save "qc2022_diversity_age.dta", replace

* SEX DIVERSITY (CANADA 2011)

use "ca2011_diversity.dta", clear

duplicates tag district_code male, gen(dup)
duplicates drop district_code male, force

keep district_code male male_prop

drop if male == .

reshape wide male_prop, i(district_code) j(male)

recode * (.=1)

gen diversity_male = (male_prop0*(ln(male_prop0)) + male_prop1*(ln(male_prop1)))*-1 

sum diversity_male
replace diversity_male = (diversity_male - `r(min)') / (`r(max)'-`r(min)')

save "ca2011_diversity_male.dta", replace

* SEX DIVERSITY (CANADA 2015)

use "ca2015_diversity.dta", clear

duplicates tag district_code male, gen(dup)
duplicates drop district_code male, force

keep district_code male male_prop

drop if male == .

reshape wide male_prop, i(district_code) j(male)

recode * (.=1)

gen diversity_male = (male_prop0*(ln(male_prop0)) + male_prop1*(ln(male_prop1)))*-1 

sum diversity_male
replace diversity_male = (diversity_male - `r(min)') / (`r(max)'-`r(min)')

save "ca2015_diversity_male.dta", replace

* SEX DIVERSITY (CANADA 2019)

use "ca2019_diversity.dta", clear

duplicates tag district_code male, gen(dup)
duplicates drop district_code male, force

keep district_code male male_prop

drop if male == .

reshape wide male_prop, i(district_code) j(male)

recode * (.=1)

gen diversity_male = (male_prop0*(ln(male_prop0)) + male_prop1*(ln(male_prop1)))*-1 

sum diversity_male
replace diversity_male = (diversity_male - `r(min)') / (`r(max)'-`r(min)')

save "ca2019_diversity_male.dta", replace

* SEX DIVERSITY (ONTARIO 2011)

use "on2011_diversity.dta", clear

duplicates tag district_code male, gen(dup)
duplicates drop district_code male, force

keep district_code male male_prop

drop if male == .

reshape wide male_prop, i(district_code) j(male)

recode * (.=1)

gen diversity_male = (male_prop0*(ln(male_prop0)) + male_prop1*(ln(male_prop1)))*-1 

sum diversity_male
replace diversity_male = (diversity_male - `r(min)') / (`r(max)'-`r(min)')

save "on2011_diversity_male.dta", replace

* SEX DIVERSITY (ONTARIO 2014)

use "on2014_diversity.dta", clear

duplicates tag district_code male, gen(dup)
duplicates drop district_code male, force

keep district_code male male_prop

drop if male == .

reshape wide male_prop, i(district_code) j(male)

recode * (.=1)

gen diversity_male = (male_prop0*(ln(male_prop0)) + male_prop1*(ln(male_prop1)))*-1 

sum diversity_male
replace diversity_male = (diversity_male - `r(min)') / (`r(max)'-`r(min)')

save "on2014_diversity_male.dta", replace

* SEX DIVERSITY (QUEBEC 2022)

use "qc2022_diversity.dta", clear

duplicates tag district_code male, gen(dup)
duplicates drop district_code male, force

keep district_code male male_prop

drop if male == .

reshape wide male_prop, i(district_code) j(male)

recode * (.=1)

gen diversity_male = (male_prop0*(ln(male_prop0)) + male_prop1*(ln(male_prop1)))*-1 

sum diversity_male
replace diversity_male = (diversity_male - `r(min)') / (`r(max)'-`r(min)')

save "qc2022_diversity_male.dta", replace

* INCOME DIVERSITY (CANADA 2011)

use "ca2011_diversity.dta", clear

duplicates tag district_code income, gen(dup)
duplicates drop district_code income, force

keep district_code income income_prop

drop if income == .

reshape wide income_prop, i(district_code) j(income)

recode * (.=1)

gen diversity_income = (income_prop1*(ln(income_prop1)) + income_prop2*(ln(income_prop2)) + income_prop3*(ln(income_prop3)) + income_prop4*(ln(income_prop4)) + income_prop5*(ln(income_prop5)) + income_prop6*(ln(income_prop6)) + income_prop7*(ln(income_prop7)) + income_prop8*(ln(income_prop8)) + income_prop9*(ln(income_prop9)) + income_prop10*(ln(income_prop10)) + income_prop11*(ln(income_prop11)) + income_prop12*(ln(income_prop12)) + income_prop13*(ln(income_prop13)) + income_prop14*(ln(income_prop14)) + income_prop15*(ln(income_prop15)))*-1

sum diversity_income
replace diversity_income = (diversity_income - `r(min)') / (`r(max)'-`r(min)')

save "ca2011_diversity_income.dta", replace

* INCOME DIVERSITY (CANADA 2015)

use "ca2015_diversity.dta", clear

duplicates tag district_code income, gen(dup)
duplicates drop district_code income, force

keep district_code income income_prop

drop if income == .

reshape wide income_prop, i(district_code) j(income)

recode * (.=1)

gen diversity_income = (income_prop771*(ln(income_prop771)) + income_prop772*(ln(income_prop772)) + income_prop773*(ln(income_prop773)) + income_prop774*(ln(income_prop774)) + income_prop775*(ln(income_prop775)) + income_prop776*(ln(income_prop776)))*-1 

sum diversity_income
replace diversity_income = (diversity_income - `r(min)') / (`r(max)'-`r(min)')

save "ca2015_diversity_income.dta", replace

* INCOME DIVERSITY (CANADA 2019)

use "ca2019_diversity.dta", clear

duplicates tag district_code income, gen(dup)
duplicates drop district_code income, force

keep district_code income income_prop

drop if income == .

reshape wide income_prop, i(district_code) j(income)

recode * (.=1)

gen diversity_income = (income_prop881*(ln(income_prop881)) + income_prop882*(ln(income_prop882)) + income_prop883*(ln(income_prop883)) + income_prop884*(ln(income_prop884)) + income_prop885*(ln(income_prop885)) + income_prop886*(ln(income_prop886)) + income_prop887*(ln(income_prop887)) + income_prop888*(ln(income_prop888)))*-1 

sum diversity_income
replace diversity_income = (diversity_income - `r(min)') / (`r(max)'-`r(min)')

save "ca2019_diversity_income.dta", replace

* INCOME DIVERSITY (ONTARIO 2011)

use "on2011_diversity.dta", clear

duplicates tag district_code income, gen(dup)
duplicates drop district_code income, force

keep district_code income income_prop

drop if income == .

reshape wide income_prop, i(district_code) j(income)

recode * (.=1)

gen diversity_income = (income_prop991*(ln(income_prop991)) + income_prop992*(ln(income_prop992)) + income_prop993*(ln(income_prop993)))*-1 

sum diversity_income
replace diversity_income = (diversity_income - `r(min)') / (`r(max)'-`r(min)')

save "on2011_diversity_income.dta", replace

* INCOME DIVERSITY (ONTARIO 2014)

use "on2014_diversity.dta", clear

duplicates tag district_code income, gen(dup)
duplicates drop district_code income, force

keep district_code income income_prop

drop if income == .

reshape wide income_prop, i(district_code) j(income)

recode * (.=1)

gen diversity_income = (income_prop1001*(ln(income_prop1001)) + income_prop1002*(ln(income_prop1002)) + income_prop1003*(ln(income_prop1003)) + income_prop1004*(ln(income_prop1004)) + income_prop1005*(ln(income_prop1005)) + income_prop1006*(ln(income_prop1006)) + income_prop1007*(ln(income_prop1007)) + income_prop1008*(ln(income_prop1008)) + income_prop1009*(ln(income_prop1009)) + income_prop1010*(ln(income_prop1010)) + income_prop1011*(ln(income_prop1011)) + income_prop1012*(ln(income_prop1012)) + income_prop1013*(ln(income_prop1013)) + income_prop1014*(ln(income_prop1014)) + income_prop1015*(ln(income_prop1015)) + income_prop1016*(ln(income_prop1016)) + income_prop1017*(ln(income_prop1017)) + income_prop1018*(ln(income_prop1018)) + income_prop1019*(ln(income_prop1019)) + income_prop1020*(ln(income_prop1020)) + income_prop1021*(ln(income_prop1021)) + income_prop1022*(ln(income_prop1022)) + income_prop1023*(ln(income_prop1023)))*-1 

sum diversity_income
replace diversity_income = (diversity_income - `r(min)') / (`r(max)'-`r(min)')

save "on2014_diversity_income.dta", replace

* INCOME DIVERSITY (QUEBEC 2022)

use "qc2022_diversity.dta", clear

duplicates tag district_code income, gen(dup)
duplicates drop district_code income, force

keep district_code income income_prop

drop if income == .

reshape wide income_prop, i(district_code) j(income)

recode * (.=1)

gen diversity_income = (income_prop881*(ln(income_prop881)) + income_prop882*(ln(income_prop882)) + income_prop883*(ln(income_prop883)) + income_prop884*(ln(income_prop884)) + income_prop885*(ln(income_prop885)) + income_prop886*(ln(income_prop886)) + income_prop887*(ln(income_prop887)) + income_prop888*(ln(income_prop888)))*-1

sum diversity_income
replace diversity_income = (diversity_income - `r(min)') / (`r(max)'-`r(min)')

save "qc2022_diversity_income.dta", replace

* VOTE CHOICE DIVERSITY (CANADA 2011)

use "ca2011_diversity.dta", clear

duplicates tag district_code vote_choice, gen(dup)
duplicates drop district_code vote_choice, force

keep district_code vote_choice choice_prop

drop if choice == .

reshape wide choice_prop, i(district_code) j(vote_choice)

recode * (.=1)

gen diversity_choice = (choice_prop1*(ln(choice_prop1)) + choice_prop2*(ln(choice_prop2)) + choice_prop3*(ln(choice_prop3)) + choice_prop4*(ln(choice_prop4)) + choice_prop5*(ln(choice_prop5)) + choice_prop88*(ln(choice_prop88)) + choice_prop999*(ln(choice_prop999)))*-1

sum diversity_choice
replace diversity_choice = (diversity_choice - `r(min)') / (`r(max)'-`r(min)')

save "ca2011_diversity_choice.dta", replace

* VOTE CHOICE DIVERSITY (CANADA 2015)

use "ca2015_diversity.dta", clear

duplicates tag district_code vote_choice, gen(dup)
duplicates drop district_code vote_choice, force

keep district_code vote_choice choice_prop

drop if choice == .

reshape wide choice_prop, i(district_code) j(vote_choice)

recode * (.=1)

gen diversity_choice = (choice_prop1*(ln(choice_prop1)) + choice_prop2*(ln(choice_prop2)) + choice_prop3*(ln(choice_prop3)) + choice_prop4*(ln(choice_prop4)) + choice_prop5*(ln(choice_prop5)) + choice_prop88*(ln(choice_prop88)))*-1

sum diversity_choice
replace diversity_choice = (diversity_choice - `r(min)') / (`r(max)'-`r(min)')

save "ca2015_diversity_choice.dta", replace

* VOTE CHOICE DIVERSITY (CANADA 2019)

use "ca2019_diversity.dta", clear

duplicates tag district_code vote_choice, gen(dup)
duplicates drop district_code vote_choice, force

keep district_code vote_choice choice_prop

drop if choice == .

reshape wide choice_prop, i(district_code) j(vote_choice)

recode * (.=1)

gen diversity_choice = (choice_prop1*(ln(choice_prop1)) + choice_prop2*(ln(choice_prop2)) + choice_prop3*(ln(choice_prop3)) + choice_prop4*(ln(choice_prop4)) + choice_prop5*(ln(choice_prop5)) + choice_prop6*(ln(choice_prop6)) + choice_prop88*(ln(choice_prop88)))*-1

sum diversity_choice
replace diversity_choice = (diversity_choice - `r(min)') / (`r(max)'-`r(min)')

save "ca2019_diversity_choice.dta", replace

* VOTE CHOICE DIVERSITY (ONTARIO 2011)

use "on2011_diversity.dta", clear

duplicates tag district_code vote_choice, gen(dup)
duplicates drop district_code vote_choice, force

keep district_code vote_choice choice_prop

drop if choice == .

reshape wide choice_prop, i(district_code) j(vote_choice)

recode * (.=1)

gen diversity_choice = (choice_prop351*(ln(choice_prop351)) + choice_prop352*(ln(choice_prop352)) + choice_prop353*(ln(choice_prop353)) + choice_prop354*(ln(choice_prop354)) + choice_prop88*(ln(choice_prop88)) + choice_prop777*(ln(choice_prop777)) + choice_prop999*(ln(choice_prop999)))*-1 

sum diversity_choice
replace diversity_choice = (diversity_choice - `r(min)') / (`r(max)'-`r(min)')

save "on2011_diversity_choice.dta", replace

* VOTE CHOICE DIVERSITY (ONTARIO 2014)

use "on2014_diversity.dta", clear

duplicates tag district_code vote_choice, gen(dup)
duplicates drop district_code vote_choice, force

keep district_code vote_choice choice_prop

drop if choice == .

reshape wide choice_prop, i(district_code) j(vote_choice)

recode * (.=1)

gen diversity_choice = (choice_prop351*(ln(choice_prop351)) + choice_prop352*(ln(choice_prop352)) + choice_prop353*(ln(choice_prop353)) + choice_prop88*(ln(choice_prop88)) + choice_prop777*(ln(choice_prop777)) + choice_prop999*(ln(choice_prop999)))*-1 

sum diversity_choice
replace diversity_choice = (diversity_choice - `r(min)') / (`r(max)'-`r(min)')

save "on2014_diversity_choice.dta", replace

* VOTE CHOICE DIVERSITY (QUEBEC 2022)

use "qc2022_diversity.dta", clear

duplicates tag district_code vote_choice, gen(dup)
duplicates drop district_code vote_choice, force

keep district_code vote_choice choice_prop

drop if choice == .

reshape wide choice_prop, i(district_code) j(vote_choice)

recode * (.=1)

gen diversity_choice = (choice_prop241*(ln(choice_prop241)) + choice_prop242*(ln(choice_prop242)) + choice_prop243*(ln(choice_prop243)) + choice_prop244*(ln(choice_prop244)) + choice_prop245*(ln(choice_prop245)) + choice_prop88*(ln(choice_prop88)) + choice_prop99*(ln(choice_prop99)))*-1

sum diversity_choice
replace diversity_choice = (diversity_choice - `r(min)') / (`r(max)'-`r(min)')

save "qc2022_diversity_choice.dta", replace

* PID DIVERSITY (CANADA 2015)

use "ca2015_diversity.dta", clear

duplicates tag district_code pid, gen(dup)
duplicates drop district_code pid, force

keep district_code pid pid_prop

drop if pid == .

reshape wide pid_prop, i(district_code) j(pid)

recode * (.=1)

gen diversity_pid = (pid_prop1*(ln(pid_prop1)) + pid_prop2*(ln(pid_prop2)) + pid_prop3*(ln(pid_prop3)) + pid_prop4*(ln(pid_prop4)) + pid_prop5*(ln(pid_prop5)) + pid_prop6*(ln(pid_prop6)))*-1

sum diversity_pid
replace diversity_pid = (diversity_pid - `r(min)') / (`r(max)'-`r(min)')

save "ca2015_diversity_pid.dta", replace

* PID DIVERSITY (CANADA 2019)

use "ca2019_diversity.dta", clear

duplicates tag district_code pid, gen(dup)
duplicates drop district_code pid, force

keep district_code pid pid_prop

drop if pid == .

reshape wide pid_prop, i(district_code) j(pid)

recode * (.=1)

gen diversity_pid = (pid_prop1*(ln(pid_prop0)) + pid_prop1*(ln(pid_prop1)) + pid_prop2*(ln(pid_prop2)) + pid_prop3*(ln(pid_prop3)) + pid_prop4*(ln(pid_prop4)) + pid_prop5*(ln(pid_prop5)) + pid_prop6*(ln(pid_prop6)) + pid_prop88*(ln(pid_prop88)))*-1

sum diversity_pid
replace diversity_pid = (diversity_pid - `r(min)') / (`r(max)'-`r(min)')

save "ca2019_diversity_pid.dta", replace

* TIME DIVERSITY (CANADA 2015)

use "ca2015_diversity.dta", clear

duplicates tag district_code time, gen(dup)
duplicates drop district_code time, force

keep district_code time time_prop

drop if time == .

reshape wide time_prop, i(district_code) j(time)

recode * (.=1)

gen diversity_time = (time_prop0*(ln(time_prop0)) + time_prop1*(ln(time_prop1)) + time_prop2*(ln(time_prop2)) + time_prop3*(ln(time_prop3)) + time_prop4*(ln(time_prop4)) + time_prop5*(ln(time_prop5)) + time_prop6*(ln(time_prop6)) + time_prop7*(ln(time_prop7)) + time_prop8*(ln(time_prop8)) + time_prop9*(ln(time_prop9)) + time_prop10*(ln(time_prop10)) + time_prop11*(ln(time_prop11)) + time_prop12*(ln(time_prop12)) + time_prop13*(ln(time_prop13)) + time_prop14*(ln(time_prop14)) + time_prop15*(ln(time_prop15)) + time_prop16*(ln(time_prop16)) + time_prop17*(ln(time_prop17)) + time_prop18*(ln(time_prop18)) + time_prop19*(ln(time_prop19)) + time_prop20*(ln(time_prop20)) + time_prop21*(ln(time_prop21)) + time_prop22*(ln(time_prop22)) + time_prop23*(ln(time_prop23)) + time_prop24*(ln(time_prop24)) + time_prop25*(ln(time_prop25)) + time_prop26*(ln(time_prop26)) + time_prop27*(ln(time_prop27)) + time_prop28*(ln(time_prop28)) + time_prop29*(ln(time_prop29)) + time_prop30*(ln(time_prop30)) + time_prop31*(ln(time_prop31)) + time_prop32*(ln(time_prop32)) + time_prop33*(ln(time_prop33)) + time_prop34*(ln(time_prop34)) + time_prop35*(ln(time_prop35)) + time_prop36*(ln(time_prop36)) + time_prop37*(ln(time_prop37)) + time_prop38*(ln(time_prop38)) + time_prop39*(ln(time_prop39)) + time_prop40*(ln(time_prop40)) + time_prop41*(ln(time_prop41)) + time_prop42*(ln(time_prop42)) + time_prop43*(ln(time_prop43)) + time_prop44*(ln(time_prop44)) + time_prop45*(ln(time_prop45)) + time_prop46*(ln(time_prop46)) + time_prop47*(ln(time_prop47)) + time_prop48*(ln(time_prop48))+ time_prop49*(ln(time_prop49)) + time_prop50*(ln(time_prop50)) + time_prop51*(ln(time_prop51)) + time_prop52*(ln(time_prop52)) + time_prop53*(ln(time_prop53)) + time_prop54*(ln(time_prop54)))*-1

sum diversity_time
replace diversity_time = (diversity_time - `r(min)') / (`r(max)'-`r(min)')

save "ca2015_diversity_time.dta", replace

* TIME DIVERSITY (CANADA 2019)

use "ca2019_diversity.dta", clear

duplicates tag district_code time, gen(dup)
duplicates drop district_code time, force

keep district_code time time_prop

drop if time == .

reshape wide time_prop, i(district_code) j(time)

recode * (.=1)

gen diversity_time = (time_prop0*(ln(time_prop0)) + time_prop1*(ln(time_prop1)) + time_prop2*(ln(time_prop2)) + time_prop3*(ln(time_prop3)) + time_prop4*(ln(time_prop4)) + time_prop5*(ln(time_prop5)) + time_prop6*(ln(time_prop6)) + time_prop7*(ln(time_prop7)) + time_prop8*(ln(time_prop8)) + time_prop9*(ln(time_prop9)) + time_prop10*(ln(time_prop10)) + time_prop11*(ln(time_prop11)) + time_prop12*(ln(time_prop12)) + time_prop13*(ln(time_prop13)) + time_prop14*(ln(time_prop14)) + time_prop15*(ln(time_prop15)) + time_prop16*(ln(time_prop16)) + time_prop17*(ln(time_prop17)) + time_prop18*(ln(time_prop18)) + time_prop19*(ln(time_prop19)) + time_prop20*(ln(time_prop20)) + time_prop21*(ln(time_prop21)) + time_prop22*(ln(time_prop22)) + time_prop23*(ln(time_prop23)) + time_prop24*(ln(time_prop24)) + time_prop25*(ln(time_prop25)) + time_prop26*(ln(time_prop26)) + time_prop27*(ln(time_prop27)) + time_prop28*(ln(time_prop28)) + time_prop29*(ln(time_prop29)) + time_prop30*(ln(time_prop30)) + time_prop31*(ln(time_prop31)) + time_prop32*(ln(time_prop32)) + time_prop33*(ln(time_prop33)) + time_prop34*(ln(time_prop34)) + time_prop35*(ln(time_prop35)) + time_prop36*(ln(time_prop36)) + time_prop37*(ln(time_prop37)) + time_prop38*(ln(time_prop38)) + time_prop39*(ln(time_prop39)) + time_prop41*(ln(time_prop41)))*-1 

sum diversity_time
replace diversity_time = (diversity_time - `r(min)') / (`r(max)'-`r(min)')

save "ca2019_diversity_time.dta", replace

* TIME DIVERSITY (QUEBEC 2022)

use "qc2022_diversity.dta", clear

duplicates tag district_code time, gen(dup)
duplicates drop district_code time, force

keep district_code time time_prop

drop if time == .

reshape wide time_prop, i(district_code) j(time)

recode * (.=1)

gen diversity_time = (time_prop0*(ln(time_prop0)) + time_prop1*(ln(time_prop1)) + time_prop2*(ln(time_prop2)) + time_prop3*(ln(time_prop3)) + time_prop4*(ln(time_prop4)) + time_prop5*(ln(time_prop5)) + time_prop6*(ln(time_prop6)) + time_prop7*(ln(time_prop7)) + time_prop8*(ln(time_prop8)) + time_prop9*(ln(time_prop9)) + time_prop10*(ln(time_prop10)) + time_prop11*(ln(time_prop11)) + time_prop12*(ln(time_prop12)) + time_prop13*(ln(time_prop13)) + time_prop14*(ln(time_prop14)) + time_prop15*(ln(time_prop15)) + time_prop16*(ln(time_prop16)) + time_prop17*(ln(time_prop17)) + time_prop18*(ln(time_prop18)) + time_prop19*(ln(time_prop19)) + time_prop20*(ln(time_prop20)) + time_prop21*(ln(time_prop21)) + time_prop22*(ln(time_prop22)) + time_prop23*(ln(time_prop23)) + time_prop24*(ln(time_prop24)) + time_prop25*(ln(time_prop25)))*-1 

sum diversity_time
replace diversity_time = (diversity_time - `r(min)') / (`r(max)'-`r(min)')

save "qc2022_diversity_time.dta", replace

* INTEREST DIVERSITY (CANADA 2015)

use "ca2015_diversity.dta", clear

duplicates tag district_code interest, gen(dup)
duplicates drop district_code interest, force

keep district_code interest interest_prop

drop if interest == .

reshape wide interest_prop, i(district_code) j(interest)

recode * (.=1)

gen diversity_interest = (interest_prop0*(ln(interest_prop0)) + interest_prop1*(ln(interest_prop1)) + interest_prop2*(ln(interest_prop2)) + interest_prop3*(ln(interest_prop3)) + interest_prop4*(ln(interest_prop4)) + interest_prop5*(ln(interest_prop5)) + interest_prop6*(ln(interest_prop6)) + interest_prop7*(ln(interest_prop7)) + interest_prop8*(ln(interest_prop8)) + interest_prop9*(ln(interest_prop9)) + interest_prop10*(ln(interest_prop10)))*-1 

sum diversity_interest
replace diversity_interest = (diversity_interest - `r(min)') / (`r(max)'-`r(min)')

save "ca2015_diversity_interest.dta", replace

* INTEREST DIVERSITY (CANADA 2019)

use "ca2019_diversity.dta", clear

duplicates tag district_code interest, gen(dup)
duplicates drop district_code interest, force

keep district_code interest interest_prop

drop if interest == .

reshape wide interest_prop, i(district_code) j(interest)

recode * (.=1)

gen diversity_interest = (interest_prop0*(ln(interest_prop0)) + interest_prop1*(ln(interest_prop1)) + interest_prop2*(ln(interest_prop2)) + interest_prop3*(ln(interest_prop3)) + interest_prop4*(ln(interest_prop4)) + interest_prop5*(ln(interest_prop5)) + interest_prop6*(ln(interest_prop6)) + interest_prop7*(ln(interest_prop7)) + interest_prop8*(ln(interest_prop8)) + interest_prop9*(ln(interest_prop9)) + interest_prop10*(ln(interest_prop10)))*-1 

sum diversity_interest
replace diversity_interest = (diversity_interest - `r(min)') / (`r(max)'-`r(min)')

save "ca2019_diversity_interest.dta", replace


********************************************************************************
** CREATE MEASURES OF DIVERSITY: FSA 
********************************************************************************

* EDUCATION DIVERSITY (QUEBEC 2022)

use "qc2022_fsa_diversity.dta", clear

*egen N_group = count(postal_code), by (postal_code)

duplicates tag postal_code education, gen(dup)
duplicates drop postal_code education, force

keep postal_code education education_fsa_prop

drop if education == .

reshape wide education_fsa_prop, i(postal_code) j(education)

recode education_fsa_prop881 education_fsa_prop882 education_fsa_prop883 education_fsa_prop884 education_fsa_prop885 education_fsa_prop886 education_fsa_prop887 (.=1)

gen diversity_fsa_educ = (education_fsa_prop881*(ln(education_fsa_prop881)) + education_fsa_prop882*(ln(education_fsa_prop882)) + education_fsa_prop883*(ln(education_fsa_prop883)) + education_fsa_prop884*(ln(education_fsa_prop884)) + education_fsa_prop885*(ln(education_fsa_prop885)) + education_fsa_prop886*(ln(education_fsa_prop886)) + education_fsa_prop887*(ln(education_fsa_prop887)))*-1 

sum diversity_fsa_educ
replace diversity_fsa_educ = (diversity_fsa_educ - `r(min)') / (`r(max)'-`r(min)')

save "qc2022_fsa_diversity_educ.dta", replace

* AGE DIVERSITY (QUEBEC 2022)

use "qc2022_fsa_diversity.dta", clear

duplicates tag postal_code age_group, gen(dup)
duplicates drop postal_code age_group, force

keep postal_code age_group age_fsa_prop

drop if age_group == .

reshape wide age_fsa_prop, i(postal_code) j(age_group)

recode age_fsa_prop1 age_fsa_prop2 age_fsa_prop3 age_fsa_prop4 age_fsa_prop5 age_fsa_prop6 (.=1)

gen diversity_fsa_age = (age_fsa_prop1*(ln(age_fsa_prop1)) + age_fsa_prop2*(ln(age_fsa_prop2)) + age_fsa_prop3*(ln(age_fsa_prop3)) + age_fsa_prop4*(ln(age_fsa_prop4)) + age_fsa_prop5*(ln(age_fsa_prop5)) + age_fsa_prop6*(ln(age_fsa_prop6)))*-1 

sum diversity_fsa_age
replace diversity_fsa_age = (diversity_fsa_age - `r(min)') / (`r(max)'-`r(min)')

save "qc2022_fsa_diversity_age.dta", replace

* SEX DIVERSITY (QUEBEC 2022)

use "qc2022_fsa_diversity.dta", clear

duplicates tag postal_code male, gen(dup)
duplicates drop postal_code male, force

keep postal_code male male_fsa_prop

drop if male == .

reshape wide male_fsa_prop, i(postal_code) j(male)

recode male_fsa_prop0 male_fsa_prop1 (.=1)

gen diversity_fsa_male = (male_fsa_prop0*(ln(male_fsa_prop0)) + male_fsa_prop1*(ln(male_fsa_prop1)))*-1 

sum diversity_fsa_male
replace diversity_fsa_male = (diversity_fsa_male - `r(min)') / (`r(max)'-`r(min)')

save "qc2022_fsa_diversity_male.dta", replace

* INCOME DIVERSITY (QUEBEC 2022)

use "qc2022_fsa_diversity.dta", clear

duplicates tag postal_code income, gen(dup)
duplicates drop postal_code income, force

keep postal_code income income_fsa_prop

drop if income == .

reshape wide income_fsa_prop, i(postal_code) j(income)

recode income_fsa_prop881 income_fsa_prop882 income_fsa_prop883 income_fsa_prop884 income_fsa_prop885 income_fsa_prop886 income_fsa_prop887 income_fsa_prop888 (.=1)

gen diversity_fsa_income = (income_fsa_prop881*(ln(income_fsa_prop881)) + income_fsa_prop882*(ln(income_fsa_prop882)) + income_fsa_prop883*(ln(income_fsa_prop883)) + income_fsa_prop884*(ln(income_fsa_prop884)) + income_fsa_prop885*(ln(income_fsa_prop885)) + income_fsa_prop886*(ln(income_fsa_prop886)) + income_fsa_prop887*(ln(income_fsa_prop887)) + income_fsa_prop888*(ln(income_fsa_prop888)))*-1

sum diversity_fsa_income
replace diversity_fsa_income = (diversity_fsa_income - `r(min)') / (`r(max)'-`r(min)')

save "qc2022_fsa_diversity_income.dta", replace

* VOTE CHOICE DIVERSITY (QUEBEC 2022)

use "qc2022_fsa_diversity.dta", clear

duplicates tag postal_code vote_choice, gen(dup)
duplicates drop postal_code vote_choice, force

keep postal_code vote_choice choice_fsa_prop

drop if choice == .

reshape wide choice_fsa_prop, i(postal_code) j(vote_choice)

recode choice_fsa_prop241 choice_fsa_prop242 choice_fsa_prop243 choice_fsa_prop244 choice_fsa_prop245 choice_fsa_prop88 choice_fsa_prop99 (.=1)

gen diversity_fsa_choice = (choice_fsa_prop241*(ln(choice_fsa_prop241)) + choice_fsa_prop242*(ln(choice_fsa_prop242)) + choice_fsa_prop243*(ln(choice_fsa_prop243)) + choice_fsa_prop244*(ln(choice_fsa_prop244)) + choice_fsa_prop245*(ln(choice_fsa_prop245)) + choice_fsa_prop88*(ln(choice_fsa_prop88)) + choice_fsa_prop99*(ln(choice_fsa_prop99)))*-1

sum diversity_fsa_choice
replace diversity_fsa_choice = (diversity_fsa_choice - `r(min)') / (`r(max)'-`r(min)')

save "qc2022_fsa_diversity_choice.dta", replace

* TIME DIVERSITY (QUEBEC 2022)

use "qc2022_fsa_diversity.dta", clear

duplicates tag postal_code time, gen(dup)
duplicates drop postal_code time, force

keep postal_code time time_fsa_prop

drop if time == .

reshape wide time_fsa_prop, i(postal_code) j(time)

recode time_fsa_prop0 time_fsa_prop1 time_fsa_prop2 time_fsa_prop3 time_fsa_prop4 time_fsa_prop5 time_fsa_prop6 time_fsa_prop7 time_fsa_prop8 time_fsa_prop9 time_fsa_prop10 time_fsa_prop11 time_fsa_prop12 time_fsa_prop13 time_fsa_prop14 time_fsa_prop15 time_fsa_prop16 time_fsa_prop17 time_fsa_prop18 time_fsa_prop19 time_fsa_prop20 time_fsa_prop21 time_fsa_prop22 time_fsa_prop23 time_fsa_prop24 time_fsa_prop25 (.=1)

gen diversity_fsa_time = (time_fsa_prop0*(ln(time_fsa_prop0)) + time_fsa_prop1*(ln(time_fsa_prop1)) + time_fsa_prop2*(ln(time_fsa_prop2)) + time_fsa_prop3*(ln(time_fsa_prop3)) + time_fsa_prop4*(ln(time_fsa_prop4)) + time_fsa_prop5*(ln(time_fsa_prop5)) + time_fsa_prop6*(ln(time_fsa_prop6)) + time_fsa_prop7*(ln(time_fsa_prop7)) + time_fsa_prop8*(ln(time_fsa_prop8)) + time_fsa_prop9*(ln(time_fsa_prop9)) + time_fsa_prop10*(ln(time_fsa_prop10)) + time_fsa_prop11*(ln(time_fsa_prop11)) + time_fsa_prop12*(ln(time_fsa_prop12)) + time_fsa_prop13*(ln(time_fsa_prop13)) + time_fsa_prop14*(ln(time_fsa_prop14)) + time_fsa_prop15*(ln(time_fsa_prop15)) + time_fsa_prop16*(ln(time_fsa_prop16)) + time_fsa_prop17*(ln(time_fsa_prop17)) + time_fsa_prop18*(ln(time_fsa_prop18)) + time_fsa_prop19*(ln(time_fsa_prop19)) + time_fsa_prop20*(ln(time_fsa_prop20)) + time_fsa_prop21*(ln(time_fsa_prop21)) + time_fsa_prop22*(ln(time_fsa_prop22)) + time_fsa_prop23*(ln(time_fsa_prop23)) + time_fsa_prop24*(ln(time_fsa_prop24)) + time_fsa_prop25*(ln(time_fsa_prop25)))*-1 

sum diversity_fsa_time
replace diversity_fsa_time = (diversity_fsa_time - `r(min)') / (`r(max)'-`r(min)')

save "qc2022_fsa_diversity_time.dta", replace


********************************************************************************
** MERGE ALL DIVERSITY MEASURES IN ONE DATASET 
********************************************************************************

* CANADA 2011

use "aggregate.dta", clear

keep if election == "ca2011"

save "ca2011_diversity_merge.dta", replace

use "ca2011_diversity_educ.dta", clear

merge 1:1 district_code using ca2011_diversity_age
drop _merge
merge 1:1 district_code using ca2011_diversity_male
drop _merge
merge 1:1 district_code using ca2011_diversity_income
drop _merge
merge 1:1 district_code using ca2011_diversity_choice
drop _merge

gen election = "ca2011"

gen diversity_all = diversity_educ + diversity_age + diversity_male + diversity_income + diversity_choice
sum diversity_all
replace diversity_all = (diversity_all - `r(min)') / (`r(max)'-`r(min)')

save "ca2011_diversity_all.dta", replace

merge 1:1 district_code using ca2011_diversity_merge, keepusing(correct_group margin reelected size type)
drop _merge

save "ca2011_diversity_merge.dta", replace

* CANADA 2015

use "aggregate.dta", clear

keep if election == "ca2015"

save "ca2015_diversity_merge.dta", replace

use "ca2015_diversity_educ.dta", clear

merge 1:1 district_code using ca2015_diversity_age
drop _merge
merge 1:1 district_code using ca2015_diversity_male
drop _merge
merge 1:1 district_code using ca2015_diversity_income
drop _merge
merge 1:1 district_code using ca2015_diversity_choice
drop _merge
merge 1:1 district_code using ca2015_diversity_pid
drop _merge
merge 1:1 district_code using ca2015_diversity_time
drop _merge
merge 1:1 district_code using ca2015_diversity_interest
drop _merge

gen election = "ca2015"

gen diversity_all = diversity_educ + diversity_age + diversity_male + diversity_income + diversity_choice + diversity_time
sum diversity_all
replace diversity_all = (diversity_all - `r(min)') / (`r(max)'-`r(min)')

gen diversity_all2 = diversity_educ + diversity_age + diversity_male + diversity_income + diversity_pid + diversity_time + diversity_interest
sum diversity_all2
replace diversity_all2 = (diversity_all2 - `r(min)') / (`r(max)'-`r(min)')

save "ca2015_diversity_all.dta", replace

merge 1:1 district_code using ca2015_diversity_merge, keepusing(correct_group margin boundary size type)
drop _merge

save "ca2015_diversity_merge.dta", replace

* CANADA 2019

use "aggregate.dta", clear

keep if election == "ca2019"

save "ca2019_diversity_merge.dta", replace

use "ca2019_diversity_educ.dta", clear

merge 1:1 district_code using ca2019_diversity_age
drop _merge
merge 1:1 district_code using ca2019_diversity_male
drop _merge
merge 1:1 district_code using ca2019_diversity_income
drop _merge
merge 1:1 district_code using ca2019_diversity_choice
drop _merge
merge 1:1 district_code using ca2019_diversity_pid
drop _merge
merge 1:1 district_code using ca2019_diversity_time
drop _merge
merge 1:1 district_code using ca2019_diversity_interest
drop _merge

gen election = "ca2019"

gen diversity_all = diversity_educ + diversity_age + diversity_male + diversity_income + diversity_choice + diversity_time
sum diversity_all
replace diversity_all = (diversity_all - `r(min)') / (`r(max)'-`r(min)')

gen diversity_all2 = diversity_educ + diversity_age + diversity_male + diversity_income + diversity_pid + diversity_time + diversity_interest
sum diversity_all2
replace diversity_all2 = (diversity_all2 - `r(min)') / (`r(max)'-`r(min)')

save "ca2019_diversity_all.dta", replace

merge 1:1 district_code using ca2019_diversity_merge, keepusing(correct_group margin reelected size type)
drop _merge

save "ca2019_diversity_merge.dta", replace

* ONTARIO 2011

use "aggregate.dta", clear

keep if election == "on2011"

save "on2011_diversity_merge.dta", replace

use "on2011_diversity_educ.dta", clear

merge 1:1 district_code using on2011_diversity_age
drop _merge
merge 1:1 district_code using on2011_diversity_male
drop _merge
merge 1:1 district_code using on2011_diversity_income
drop _merge
merge 1:1 district_code using on2011_diversity_choice
drop _merge

gen election = "on2011"

gen diversity_all = diversity_educ + diversity_age + diversity_male + diversity_income + diversity_choice
sum diversity_all
replace diversity_all = (diversity_all - `r(min)') / (`r(max)'-`r(min)')

save "on2011_diversity_all.dta", replace

merge 1:1 district_code using on2011_diversity_merge, keepusing(correct_group margin reelected size type)
drop _merge

save "on2011_diversity_merge.dta", replace

* ONTARIO 2014

use "aggregate.dta", clear

keep if election == "on2014"

save "on2014_diversity_merge.dta", replace

use "on2014_diversity_educ.dta", clear

merge 1:1 district_code using on2014_diversity_age
drop _merge
merge 1:1 district_code using on2014_diversity_male
drop _merge
merge 1:1 district_code using on2014_diversity_income
drop _merge
merge 1:1 district_code using on2014_diversity_choice
drop _merge

gen election = "on2014"

gen diversity_all = diversity_educ + diversity_age + diversity_male + diversity_income + diversity_choice
sum diversity_all
replace diversity_all = (diversity_all - `r(min)') / (`r(max)'-`r(min)')

save "on2014_diversity_all.dta", replace

merge 1:1 district_code using on2014_diversity_merge, keepusing(correct_group margin reelected size type)
drop _merge

save "on2014_diversity_merge.dta", replace

* QUEBEC 2022

use "aggregate.dta", clear

keep if election == "qc2022"

save "qc2022_diversity_merge.dta", replace

use "qc2022_diversity_educ.dta", clear

merge 1:1 district_code using qc2022_diversity_age
drop _merge
merge 1:1 district_code using qc2022_diversity_male
drop _merge
merge 1:1 district_code using qc2022_diversity_income
drop _merge
merge 1:1 district_code using qc2022_diversity_choice
drop _merge
merge 1:1 district_code using qc2022_diversity_time
drop _merge

gen election = "qc2022"

gen diversity_all = diversity_educ + diversity_age + diversity_male + diversity_income + diversity_choice + diversity_time
sum diversity_all
replace diversity_all = (diversity_all - `r(min)') / (`r(max)'-`r(min)')

save "qc2022_diversity_all.dta", replace

merge 1:1 district_code using qc2022_diversity_merge, keepusing(correct_group margin reelected size type)
drop _merge

save "qc2022_diversity_merge.dta", replace

* QUEBEC 2022 FSA

use "aggregate_fsa.dta", clear

keep if election == "qc2022"

save "qc2022_fsa_diversity_merge.dta", replace

use "qc2022_fsa_diversity_educ.dta", clear

merge 1:1 postal_code using qc2022_fsa_diversity_age
drop _merge
merge 1:1 postal_code using qc2022_fsa_diversity_male
drop _merge
merge 1:1 postal_code using qc2022_fsa_diversity_income
drop _merge
merge 1:1 postal_code using qc2022_fsa_diversity_choice
drop _merge
merge 1:1 postal_code using qc2022_fsa_diversity_time
drop _merge

gen election = "qc2022"

gen diversity_fsa_all = diversity_fsa_educ + diversity_fsa_age + diversity_fsa_male + diversity_fsa_income + diversity_fsa_choice + diversity_fsa_time
sum diversity_fsa_all
replace diversity_fsa_all = (diversity_fsa_all - `r(min)') / (`r(max)'-`r(min)')

save "qc2022_fsa_diversity_all.dta", replace

merge 1:1 postal_code using qc2022_fsa_diversity_merge, keepusing(district_code correct_group margin reelected size type)
drop _merge

save "qc2022_fsa_diversity_merge.dta", replace


********************************************************************************
** NEW FILES
********************************************************************************

use "ca2011_diversity_merge.dta", clear

append using ca2015_diversity_merge ca2019_diversity_merge on2011_diversity_merge on2014_diversity_merge qc2022_diversity_merge aggregate, force

keep election postal_code district_code type correct_group margin reelected boundary size diversity_educ diversity_age diversity_male diversity_income diversity_choice diversity_time diversity_interest diversity_pid diversity_all diversity_all2

save "diversity_merge.dta", replace

use "qc2022_fsa_diversity_merge.dta", clear

append using aggregate_fsa, force

rename diversity_fsa_educ diversity_educ
rename diversity_fsa_age diversity_age
rename diversity_fsa_male diversity_male
rename diversity_fsa_income diversity_income
rename diversity_fsa_choice diversity_choice
rename diversity_fsa_time diversity_time
rename diversity_fsa_all diversity_all

gen diversity_interest = .
gen diversity_pid = .
gen diversity_all2 = .

keep election postal_code district_code type correct_group margin reelected boundary size diversity_educ diversity_age diversity_male diversity_income diversity_choice diversity_time diversity_interest diversity_pid diversity_all diversity_all2

replace election = "qc2022FAS" if election == "qc2022"

save "fsa_diversity_merge.dta", replace

use "diversity_merge.dta", replace

append using fsa_diversity_merge, force

drop if diversity_educ==.

save "diversity_merge_with_fsa.dta", replace


********************************************************************************
** NEW FILE
********************************************************************************

use "diversity_merge.dta", clear
keep district_code election type diversity_educ diversity_age diversity_male diversity_income diversity_choice diversity_pid diversity_all diversity_all2 diversity_time diversity_interest
drop if diversity_educ == .
save "diversity_merge2.dta", replace

use "merge.dta", clear
keep district_code district_outcome forecast_district election type correct_district vote_district pidscale_district pidstatus_district interest interest_n univ postsec male age55 highinc margin margin_z boundary reelected time survey_weight
save "merge2.dta", replace

use "merge2.dta", clear

merge m:1 district_code election using diversity_merge2

drop _merge

save "merge2.dta", replace


********************************************************************************
** GROUP-LEVEL ANALYSES WITH DIVERSITY MEASURES
********************************************************************************

use "diversity_merge_with_fsa.dta", clear

eststo D1: logit correct_group diversity_choice diversity_educ diversity_age diversity_male diversity_income margin i.reelected size if election == "ca2011"
eststo D2: logit correct_group diversity_choice diversity_educ diversity_interest diversity_age diversity_male diversity_income diversity_time margin i.boundary size if election == "ca2015"
eststo D3: logit correct_group diversity_choice diversity_educ diversity_interest diversity_age diversity_male diversity_income diversity_time margin i.reelected size if election == "ca2019"
eststo D4: logit correct_group diversity_choice diversity_educ diversity_age diversity_male diversity_income margin i.reelected size if election == "on2011"
eststo D5: logit correct_group diversity_choice diversity_educ diversity_age diversity_male diversity_income margin i.reelected size if election == "on2014"
eststo D6: logit correct_group diversity_choice diversity_educ diversity_age diversity_male diversity_income diversity_time margin i.reelected size if election == "qc2022"
eststo D7: melogit correct_group diversity_choice diversity_educ diversity_age diversity_male diversity_income diversity_time margin i.reelected size if election == "qc2022FAS" || district_code:

label variable correct_group " "

esttab D1 D2 D3 D4 D5 D6 D7 using "diversity.tex", wide drop(0.reelected 0.boundary) refcat(diversity_choice "\textbf{Diversity}" margin "\textbf{Task difficulty}" size "\textbf{Decision making}" ll "Model fit statistics", nolabel) not width(\hsize) b(2) star(+ 0.10 * 0.05 ** 0.01 *** 0.001) label mtitles("CA 11" "CA 15\textsuperscript{(a)}" "CA 19" "ON 11\textsuperscript{(b)}" "ON 14\textsuperscript{(b)}" "QC 22" "QC 22-FSA") interaction("$\times$")style(tex) replace booktabs varlabels(_cons "\textbf{Constant}" diversity_choice "\hspace{1em} Vote choice" diversity_educ "\hspace{1em} Education" diversity_interest "\hspace{1em} Interest" diversity_male "\hspace{1em} Sex" diversity_age "\hspace{1em} Age group" diversity_income "\hspace{1em} Income" margin "\hspace{1em} Margin of victory" 1.reelected "\hspace{1em} Reelected" 1.boundary "\hspace{1em} Boundary changes" diversity_time "\hspace{1em} Response date" size "\hspace{1em} Logged group size" _cons) title("Citizens' forecasts for district-level elections, group level."\label{diversity}) order(diversity_choice diversity_educ diversity_interest diversity_age diversity_male diversity_income diversity_time margin 1.reelected 1.boundary size _cons) eqlab("" "") scalars(ll aic bic) stats(N ll r2_p, labels("Observations" "Log likelihood" "Pseudo-R}\textsuperscript{2}") fmt(%9.0fc %9.2fc %9.2fc)) nonotes

eststo clear

log close